inst/extdata/Dx.R

suppressPackageStartupMessages(library(optparse))
suppressPackageStartupMessages(library(futile.logger))

### Parsing command line ------------------------------------------------------

option_list <- list(
    make_option(c("--rds"), action = "store", type = "character",
        default = NULL, help = "PureCN output RDS file"),
    make_option(c("--callable"), action = "store", type = "character",
        default = NULL,
        help = "TMB: File parsable by rtracklayer specifying all callable regions, e.g. generated by GATK CallableLoci"),
    make_option(c("--exclude"), action = "store", type = "character",
        default = NULL,
        help = paste("TMB: File parsable by rtracklayer specifying regions to exclude",
         "from mutation burden calculation, e.g. intronic regions")),
    make_option(c("--max-prior-somatic"), action = "store", type = "double",
        default = formals(PureCN::callMutationBurden)$max.prior.somatic,
        help = "TMB: Can be used to exclude hotspot mutations with high somatic prior probability [default %default]"),
    make_option(c("--keep-indels"), action = "store_true", default = FALSE, 
        help = "TMB: count indels"),
    make_option(c("--signatures"), action = "store_true", default = FALSE, 
        help="Attempt the deconstruction of COSMIC signatures (requires deconstructSigs package)"),
    make_option(c("--signature-databases"), action = "store", type = "character", 
        default = "signatures.exome.cosmic.v3.may2019", 
        help = "Use the specified signature databases provided by deconstrucSigs. To test multiple databases, provide them : separated [%default]."),
    make_option(c("--out"), action = "store", type = "character",
        default = NULL,
        help = "File name prefix to which results should be written"),
    make_option(c("-v", "--version"), action = "store_true", default = FALSE, 
        help = "Print PureCN version"),
    make_option(c("-f", "--force"), action = "store_true", default = FALSE, 
        help="Overwrite existing files")
)
alias_list <- list(
    "signature_databases" = "signature-databases",
    "maxpriorsomatic" = "max-prior-somatic",
    "keepindels" = "keep-indels"
)
replace_alias <- function(x, deprecated = TRUE) {
    idx <- match(x, paste0("--", names(alias_list)))
    if (any(!is.na(idx))) {
        replaced <- paste0("--", alias_list[na.omit(idx)])
        x[!is.na(idx)] <- replaced
        if (deprecated) {
            flog.warn("Deprecated arguments, use %s instead.", paste(replaced, collapse=" "))
        }
    }
    return(x)
}
    
opt <- parse_args(OptionParser(option_list = option_list),
    args = replace_alias(commandArgs(trailingOnly = TRUE)),
    convert_hyphens_to_underscores = TRUE)

if (opt$version) {
    message(as.character(packageVersion("PureCN")))
    q(status = 0)
}

# Parse input rds
infileRds <- opt$rds
if (is.null(infileRds)) stop("Need --rds")
infileRds <- normalizePath(infileRds, mustWork = TRUE)

# Parse both BED files restricting covered region
callableFile <- opt$callable
callable <- NULL
if (!is.null(callableFile)) {
    suppressPackageStartupMessages(library(rtracklayer))
    callableFile <- normalizePath(callableFile, mustWork = TRUE)
    flog.info("Reading %s...", callableFile)
    callable <- GRanges(import(callableFile))
}

excludeFile <- opt$exclude
exclude <- NULL
if (!is.null(excludeFile)) {
    suppressPackageStartupMessages(library(rtracklayer))
    excludeFile <- normalizePath(excludeFile, mustWork = TRUE)
    flog.info("Reading %s...", excludeFile)
    exclude <- GRanges(import(excludeFile))
}


### Run PureCN ----------------------------------------------------------------

flog.info("Loading PureCN...")
suppressPackageStartupMessages(library(PureCN))

res <- readCurationFile(infileRds)
sampleid <- res$input$sampleid

.getOutPrefix <- function(opt, infile, sampleid) {
    out <- opt[["out"]]
    if (is.null(out)) {
        if (!is.null(infile) && file.exists(infile)) {
            outdir <- dirname(infile)
            prefix <- sampleid
        } else {
            stop("Need --out")
        }    
    } else {
        outdir <- dirname(out)
        prefix <- basename(out)
    }    
    outdir <- normalizePath(outdir, mustWork = TRUE)
    outPrefix <- file.path(outdir, prefix)
}
outPrefix <- .getOutPrefix(opt, infileRds, sampleid)
     
outfileMb <- paste0(outPrefix, "_mutation_burden.csv")
if (!opt$force && file.exists(outfileMb)) {
    stop(outfileMb, " exists. Use --force to overwrite.")
}    

flog.info("Calling mutation burden...")

fun.countMutation <- eval(formals(callMutationBurden)$fun.countMutation)
if (opt$keep_indels) fun.countMutation <- function(vcf) width(vcf) >= 1
    
mb <- callMutationBurden(res, callable = callable, exclude = exclude,
        max.prior.somatic = opt$max_prior_somatic,
        fun.countMutation = fun.countMutation)

write.csv(cbind(Sampleid=sampleid, mb), file = outfileMb, row.names = FALSE,
          quote = FALSE)

flog.info("Calling chromosomal instability...")

cin <- data.frame(
    cin = callCIN(res, allele.specific = FALSE, reference.state = "normal"),
    cin.allele.specific = callCIN(res, reference.state = "normal"),
    cin.ploidy.robust = callCIN(res, allele.specific = FALSE),
    cin.allele.specific.ploidy.robust = callCIN(res)
)
outfileCin <- paste0(outPrefix, "_cin.csv")
if (!opt$force && file.exists(outfileCin)) {
    flog.warn("%s exists. Use --force to overwrite.", outfileCin)
} else {
    write.csv(cbind(Sampleid=sampleid, round(cin, digits = 4)), 
              file = outfileCin, row.names = FALSE, quote = FALSE)
}

if (opt$signatures && require(deconstructSigs)) {

    x <- predictSomatic(res)
    x$Sampleid <- res$input$sampleid

    flog.info("deconstructSigs package found.")

    if (compareVersion(package.version("deconstructSigs"), "1.9.0") < 0) {
        flog.fatal("deconstructSigs package is outdated. >= 1.9.0 required")
        q(status = 1)
    }    

    s <- x[ x$ML.SOMATIC & x$prior.somatic > 0.1 & !x$FLAGGED ,]

    genome <- genome(res$input$vcf)[1]
    bsg <- NULL
    if (genome != "hg19") { 
        bsgPkg <- paste0("BSgenome.Hsapiens.UCSC.", genome)
        if (!require(bsgPkg, character.only = TRUE)) { 
            flog.fatal("%s not found.", bsgPkg)
            q(status = 1)
        }
        bsg <- get(bsgPkg)
    }
    databases <- strsplit(opt$signature_databases, ":")[[1]]
    for (database in databases) {
        signatures.ref <- get(database)
        flog.info("Using %s signature database.", database)
        database_name <- if (length(databases) > 1) gsub("signatures.", "_", database) else ""
        outfile <- paste0(outPrefix, database_name, "_signatures.csv")
        outfile_pdf <- paste0(outPrefix, database_name, "_signatures.pdf")
        if (nrow(s) >= 10) {
            sig.type <- if (grepl("^signatures.dbs", database)[1]) "DBS" else "SBS"
            s[["REF"]] <- as.character(s[["REF"]])
            s[["ALT"]] <- as.character(s[["ALT"]])
            sigs.input <- mut.to.sigs.input(s, 
                            sample.id = "Sampleid", chr = "chr", pos = "start", 
                            ref = "REF", alt = "ALT", bsg = bsg, sig.type = sig.type,
                            dbs_table = dbs_possible)
            sigs <- whichSignatures(tumor.ref = sigs.input, 
                                    signatures.ref = signatures.ref, 
                                    contexts.needed = TRUE, 
                                    tri.counts.method = "default")
            pdf(outfile_pdf)
            plotSignatures(sigs, sig.type = sig.type)
            makePie(sigs)
            invisible(dev.off())
            write.csv(sigs$weights, file = outfile)
        } else {
            flog.warn("Not enough somatic calls to deconstruct signatures.")
            m <- data.frame(matrix(nrow=1, ncol=nrow(signatures.ref)))
            colnames(m) <- rownames(signatures.ref)
            rownames(m) <- sampleid
            write.csv(m, file = outfile)
        }     
    }
}
lima1/PureCN documentation built on April 3, 2024, 7:56 a.m.