PureCN: Copy number calling and SNV classification using targeted short read sequencing

suppressPackageStartupMessages(library(optparse))
suppressPackageStartupMessages(library(futile.logger))

### Parsing command line ------------------------------------------------------
option_list <- list(
    make_option(c("-i", "--sampleid"), action = "store", type = "character",
                default = NULL, help = "Sample id"),
    make_option(c("--normal"), action = "store", type = "character", default = NULL,
        help = "Input: normal coverage, GC-normalized. Optional if normaldb or seg-file is provided."),
    make_option(c("--tumor"), action = "store", type = "character",
                default = NULL, help = "Input: tumor coverage, GC-normalized"),
    make_option(c("--vcf"), action = "store", type = "character", default = NULL,
                help = "Input: VCF file. Read counts of common SNPs can be provided in GATK4 CollectAllelicCounts format (file suffix .tsv)."),
    make_option(c("--rds"), action = "store", type = "character", default = NULL,
        help = "Input: PureCN output RDS file, used to regenerate plots and files after manual curation"),
    make_option(c("--mapping-bias-file"), action = "store", type = "character", default = NULL,
        help = "Input: Mapping bias RDS file, generated by NormalDB.R (or calculateMappingBiasVcf)"),
    make_option(c("--normaldb"), action = "store", type = "character", default = NULL,
        help = "Input: NormalDB.rds file. Generated by NormalDB.R."),
    make_option(c("--seg-file"), action = "store", type = "character",
                default = NULL, help = "Input: Segmentation file"),
    make_option(c("--log-ratio-file"), action = "store", type = "character",
                default = NULL, help = "Input: Log2 copy number ratio file"),
    make_option(c("--additional-tumors"), action = "store", type = "character",
                default = NULL, help = "Input: tumor coverages from additional biopsies from the SAME patient, GC-normalized"),
    make_option(c("--sex"), action = "store", type = "character",
        default = formals(PureCN::runAbsoluteCN)$sex[[2]],
        help = "Input: Sex of sample. ? (detect), diplod (non-diploid chromosomes removed), F or M [default %default]"),
    make_option(c("--genome"), action = "store", type = "character",
                default = NULL, help = "Assay: Genome version. Use hg18, hg19, or hg38 for human [default %default]"),
    make_option(c("--intervals"), action = "store", type = "character", default = NULL,
        help = "Assay: Interval file as generated by IntervalFile.R"),
    make_option(c("--stats-file"), action = "store", type = "character", default = NULL,
        help = "VCF Filter: MuTect stats file, used to filter artifacts"),
    make_option(c("--min-af"), action = "store", type = "double", default = 0.03,
        help = "VCF Filter: minimum allelic fraction [default %default]"),
    make_option(c("--snp-blacklist"), action = "store", type = "character", default = NULL,
        help = "VCF Filter: File parsable by rtracklayer that defines blacklisted regions"),
    make_option(c("--error"), action = "store", type = "double",
        default = formals(PureCN::runAbsoluteCN)$error,
        help = "VCF Filter: Estimated default sequencing error rate for artifact filtering. Can be overriden by base quality scores [default %default]"),
    make_option(c("--base-quality-offset"), action = "store", type = "integer",
        default = formals(PureCN::filterVcfBasic)$base.quality.offset,
        help = "VCF Filter: Subtracts the specified value from the base quality score [default %default]"),
    make_option(c("--min-base-quality"), action = "store", type = "integer",
        default = formals(PureCN::filterVcfBasic)$min.base.quality,
        help = "VCF Filter: Minimum base quality for variants. Set to 0 to turn off filter [default %default]"),
    make_option(c("--min-supporting-reads"), action = "store", type = "integer",
        default = formals(PureCN::filterVcfBasic)$min.supporting.reads,
        help = "VCF Filter: Instead of calculating the min. number of supporting reads, use specified one [default %default]"),
    make_option(c("--db-info-flag"), action = "store", type = "character",
        default = formals(PureCN::runAbsoluteCN)$DB.info.flag,
        help = "VCF Filter: VCF INFO flag indicating presence in common germline databases [default %default]"),
    make_option(c("--popaf-info-field"), action = "store", type = "character",
        default = formals(PureCN::runAbsoluteCN)$POPAF.info.field,
        help = "VCF Filter: VCF INFO field providing population allele frequency [default %default]"),
    make_option(c("--cosmic-cnt-info-field"), action = "store", type = "character",
        default = formals(PureCN::runAbsoluteCN)$Cosmic.CNT.info.field,
        help = "VCF Filter: VCF INFO field providing counts in the Cosmic database [default %default]"),
    make_option(c("--cosmic-vcf-file"), action = "store", type = "character", default = NULL,
        help = "VCF Filter: Adds a Cosmic.CNT INFO annotation using a Cosmic VCF. Added for convenience, we recommend adding annotations upstream [default %default]"),
    make_option(c("--min-cosmic-cnt"), action = "store", type = "integer",
        default = formals(PureCN::setPriorVcf)$min.cosmic.cnt,
        help = "VCF Filter: Min number of COSMIC hits [default %default]"),
    make_option(c("--interval-padding"), action = "store", type = "integer",
        default = formals(PureCN::filterVcfBasic)$interval.padding,
        help = "VCF Filter: Keep variants in the flanking region of specified size [default %default]"),
    make_option(c("--min-total-counts"), action = "store", type = "integer",
        default = formals(PureCN::filterIntervals)$min.total.counts,
        help = "Interval Filter: Keep only intervals with at least that many counts in both tumor and (tanget) normal [default %default]"),
    make_option(c("--min-fraction-offtarget"), action = "store", type = "double",
        default = formals(PureCN::filterIntervals)$min.fraction.offtarget,
        help = "Interval Filter: Ignore off-target internals when only the specified fraction of all intervals are off-target intervals  [default %default]"),
    make_option(c("--num-eigen-vectors"), action = "store", type = "integer",
        default = formals(PureCN::calculateTangentNormal)$num.eigen,
        help = "Coverage Normalization: Number of eigen vectors when --normaldb is provided [default %default]"),
    make_option(c("--fun-segmentation"), action = "store", type = "character", default = "CBS",
        help = "Segmentation: Algorithm. CBS, PSCBS, GATK4, Hclust, or none [default %default]"),
    make_option(c("--alpha"), action = "store", type = "double",
        default = formals(PureCN::segmentationCBS)$alpha,
        help = "Segmentation: significance of breakpoints [default %default]"),
    make_option(c("--undo-sd"), action = "store", type = "double",
        default = formals(PureCN::segmentationCBS)$undo.SD,
        help = "Segmentation: DNAcopy undo.SD argument. If NULL, tries to find a sensible default [default %default]."),
    make_option(c("--changepoints-penalty"), action = "store", type = "double",
        default = formals(PureCN::segmentationGATK4)$changepoints.penalty,
        help = "Segmentation: GATK4 ModelSegments --number-of-changepoints-penalty-factor argument. If NULL, tries to find a sensible default [default %default]."),
    make_option(c("--additional-cmd-args"), action = "store", type = "character",
        default = formals(PureCN::segmentationGATK4)$additional.cmd.args,
        help = "Segmentation: Used in GATK4 segmentation function to add additional ModelSegments arguments [default %default]"),
    make_option(c("--max-segments"), action = "store", type = "double",
        default = formals(PureCN::runAbsoluteCN)$max.segments,
        help = "Segmentation: Flag noisy samples with many segments [default %default]"),
    make_option(c("--min-logr-sdev"), action = "store", type = "double",
        default = formals(PureCN::runAbsoluteCN)$min.logr.sdev,
        help = "Segmentation: Set minimum log-ratio standard deviation to this value. Useful when uncorrected biases exceed the log-ratio noise [default %default]."),
    make_option(c("--min-purity"), action = "store", type = "double",
        default = formals(PureCN::runAbsoluteCN)$test.purity[[2]],
        help = "Minimum considered purity [default %default]"),
    make_option(c("--max-purity"), action = "store", type = "double",
        default = formals(PureCN::runAbsoluteCN)$test.purity[[3]],
        help = "Maximum considered purity [default %default]"),
    make_option(c("--min-ploidy"), action = "store", type = "double",
        default = formals(PureCN::runAbsoluteCN)$min.ploidy,
        help = "Minimum considered ploidy [default %default]"),
    make_option(c("--max-ploidy"), action = "store", type = "double",
        default = formals(PureCN::runAbsoluteCN)$max.ploidy,
        help = "Maximum considered ploidy [default %default]"),
    make_option(c("--max-copy-number"), action = "store", type = "double",
        default =  max(eval(formals(PureCN::runAbsoluteCN)$test.num.copy)),
        help = "Maximum allele-specific integer copy number, only used for fitting allele-specific copy numbers. Higher copy numbers are still be inferred and reported [default %default]"),
    make_option(c("--post-optimize"), action = "store_true", default = FALSE,
        help = "Post-optimization [default %default]"),
    make_option(c("--bootstrap-n"), action = "store", type = "integer", default = 0,
        help = "Number of bootstrap replicates [default %default]"),
    make_option(c("--speedup-heuristics"), action = "store", type = "integer",
        default =  max(eval(formals(PureCN::runAbsoluteCN)$speedup.heuristics)),
        help = "Tries to avoid spending computation time on unlikely local optima [default %default]"),
    make_option(c("--model-homozygous"), action = "store_true", default = FALSE,
        help = "Model homozygous variants in very pure samples [default %default]"),
    make_option(c("--model"), action = "store", type = "character",
        default = formals(PureCN::runAbsoluteCN)$model[[2]],
        help = "Model used to fit variants. Either beta or betabin [default %default]."),
    make_option(c("--log-ratio-calibration"), action = "store", type = "double",
        default = formals(PureCN::runAbsoluteCN)$log.ratio.calibration,
        help = "Parameter defining the extend to which log-ratios might be miscalibrated [default %default]"),
    make_option(c("--max-non-clonal"), action = "store", type = "double",
        default = formals(PureCN::runAbsoluteCN)$max.non.clonal,
        help = "Maximum genomic fraction assigned to a subclonal copy number state [default %default]"),
    make_option(c("--max-homozygous-loss"), action = "store", type = "character",
        default = paste(formals(PureCN::runAbsoluteCN)$max.homozygous.loss[2:3], collapse = ","),
        help = "Maximum genomic fraction assigned to a complete loss and maximum size of a loss in bp [default %default]"),
    make_option(c("--out-vcf"), action = "store_true", default = FALSE,
        help = "Output: Annotate input VCF with posterior probabilities. Otherwise only produce CSV file."),
    make_option(c("--out"), action = "store", type = "character", default = NULL,
        help = paste("Output: File name prefix to which results should be written.",
        "If out is a directory, will use out/sampleid.")),
    make_option(c("--rds-version"), action = "store", type = "integer", default = NULL,
        help = paste("Output: RDS files serialized with workspace version.",
        "Default uses the saveRDS default. To get R versions prior to 3.6.0 being able to read, use --rds-version=2.")),
    make_option(c("--seed"), action = "store", type = "integer", default = NULL,
        help = "Seed for random number generator [default %default]"),
    make_option(c("--parallel"), action = "store_true", default = FALSE,
        help = "Use BiocParallel to fit local optima in parallel."),
    make_option(c("--cores"), action = "store", type = "integer", default = NULL,
        help = "Use BiocParallel MulticoreParam backend with the specified number of worker cores (--parallel uses the default BiocParallel backend instead)."),
    make_option(c("-v", "--version"), action = "store_true", default = FALSE,
        help = "Print PureCN version"),
    make_option(c("-f", "--force"), action = "store_true", default = FALSE,
        help = "Overwrite existing files")
)

alias_list <- list(
    "mappingbiasfile" = "mapping-bias-file",
    "segfile" = "seg-file",
    "logratiofile" = "log-ratio-file",
    "additionaltumors" = "additional-tumors",
    "statsfile" = "stats-file",
    "minaf" = "min-af",
    "snpblacklist" = "snp-blacklist",
    "dbinfoflag" = "db-info-flag",
    "popafinfofield" = "popaf-info-field",
    "mincosmiccnt" = "min-cosmic-cnt",
    "padding" = "interval-padding",
    "mintotalcounts"  = "min-total-counts",
    "minfractionofftarget" = "min-fraction-offtarget",
    "funsegmentation" = "fun-segmentation",
    "undosd" = "undo-sd",
    "changepointspenalty" = "changepoints-penalty",
    "additionalcmdargs" = "additional-cmd-args",
    "maxsegments" = "max-segments",
    "minlogrsdev" = "min-logr-sdev",
    "minpurity" = "min-purity",
    "maxpurity" = "max-purity",
    "minploidy" = "min-ploidy",
    "maxploidy" = "max-ploidy",
    "maxcopynumber" = "max-copy-number",
    "postoptimize" = "post-optimize",
    "bootstrapn" = "bootstrap-n",
    "modelhomozygous" = "model-homozygous",
    "logratiocalibration" = "log-ratio-calibration",
    "maxnonclonal" = "max-non-clonal",
    "maxhomozygousloss" = "max-homozygous-loss",
    "speedupheuristics" = "speedup-heuristics",
    "outvcf" = "out-vcf"
)
replace_alias <- function(x, deprecated = TRUE) {
    idx <- match(x, paste0("--", names(alias_list)))
    if (any(!is.na(idx))) {
        replaced <- paste0("--", alias_list[na.omit(idx)])
        x[!is.na(idx)] <- replaced
        if (deprecated) {
            flog.warn("Deprecated arguments, use %s instead.", paste(replaced, collapse = " "))
        }
    }
    return(x)
}
    
opt <- parse_args(OptionParser(option_list = option_list),
    args = replace_alias(commandArgs(trailingOnly = TRUE)),
    convert_hyphens_to_underscores = TRUE)

if (opt$version) {
    message(as.character(packageVersion("PureCN")))
    q(status = 0)
}

if (!is.null(opt$seed)) {
    set.seed(opt$seed)
}

tumor.coverage.file <- opt[["tumor"]]
normal.coverage.file <- opt[["normal"]]
snp.blacklist <- opt$snp_blacklist

if (!is.null(snp.blacklist)) {
    snp.blacklist <- strsplit(snp.blacklist, ",")[[1]]
}

seg.file <- opt$seg_file
log.ratio <- opt$log_ratio_file
sampleid <- opt$sampleid
out <- opt[["out"]]
file.rds <- opt$rds
normalDB <- NULL
BPPARAM <- NULL

.getFilePrefix <- function(out, sampleid) {
    isDir <- file.info(out)$isdir
    if (!is.na(isDir) && isDir) return(file.path(out, sampleid))
    out
}

if (!is.null(file.rds) && file.exists(file.rds)) {
    if (is.null(out)) out <- sub(".rds$", "", file.rds)
} else {
    if (is.null(sampleid)) stop("Need --sampleid.")
    if (is.null(opt$genome)) stop("Need --genome.")
    out <- .getFilePrefix(out, sampleid)
    file.rds <- paste0(out, ".rds")
    if (is.null(seg.file)) {
        if (is.null(tumor.coverage.file)) stop("Need either --tumor or --segfile.")
        tumor.coverage.file <- normalizePath(tumor.coverage.file,
            mustWork = TRUE)
    }
}
    
normalizePath(dirname(out), mustWork = TRUE)
if (file.access(dirname(out), 2) < 0) stop("Permission denied to write in --out.")

flog.info("Loading PureCN %s...", Biobase::package.version("PureCN"))
suppressPackageStartupMessages(library(PureCN))
library(futile.logger)

debug <- FALSE
if (Sys.getenv("PURECN_DEBUG") != "") {
    flog.threshold("DEBUG")
    debug <- TRUE
}

.checkFileList <- function(file) {
    files <- read.delim(file, as.is = TRUE, header = FALSE)[, 1]
    numExists <- sum(file.exists(files), na.rm = TRUE)
    if (numExists < length(files)) {
        stop("File not exists in file ", file)
    }
    files
}

### Run PureCN ----------------------------------------------------------------

if (file.exists(file.rds) && !opt$force) {
    flog.info("%s already exists. Skipping... (--force will overwrite)",
        file.rds)
    ret <- readCurationFile(file.rds)
    if (is.null(sampleid)) sampleid <- ret$input$sampleid
} else {
    if (!is.null(opt$normaldb)) {
        normalDB <- readRDS(opt$normaldb)
        if (!is.null(normal.coverage.file)) {
            flog.warn("Both --normal and --normalDB provided. normalDB will NOT be used for coverage denoising. You probably do not want this.")
        }
    }

    .getNormalCoverage <- function(normal.coverage.file) {
        if (!is.null(normalDB)) {
            if (is.null(normal.coverage.file)) {
                normal.coverage.file <- calculateTangentNormal(tumor.coverage.file,
                    normalDB, num.eigen = opt[["num_eigen_vectors"]])
            }
        } else if (is.null(normal.coverage.file) && is.null(seg.file) &&
                   is.null(log.ratio)) {
            stop("Need either normalDB or normal.coverage.file")
        }
        normal.coverage.file
    }
    normal.coverage.file <- .getNormalCoverage(normal.coverage.file)
    file.log <- paste0(out, ".log")

    pdf(paste0(out, "_segmentation.pdf"), width = 10, height = 11)
    if (!is.null(opt$additional_tumors)) {
        if (!is.null(seg.file)) {
            stop("--additional-tumors overwrites --seg-file")
        }
        seg.file <- paste0(out, "_multisample.seg")
        if (grepl(".list$", opt$additional_tumors)) {
            additional.tumors <- .checkFileList(opt$additional_tumors)
        } else {
            additional.tumors <- opt$additional_tumors
        }
        multi.seg <- processMultipleSamples(
            c(list(tumor.coverage.file), as.list(additional.tumors)),
            sampleids = c(sampleid, paste(sampleid,
                seq_along(additional.tumors) + 1, sep = "_")),
            normalDB = normalDB, genome = opt$genome, verbose = debug)
        write.table(multi.seg, seg.file, row.names = FALSE, sep = "\t")
    }
    af.range <- c(opt$min_af, 1 - opt$min_af)
    test.purity <- seq(opt$min_purity, opt$max_purity, by = 0.01)

    uses.recommended.fun <- FALSE
    recommended.fun <- "Hclust"
    if (is.null(seg.file)) {
        if (!is.null(opt$vcf)) {
            recommended.fun <- "PSCBS"
        } else {
            recommended.fun <- "CBS"
        }
    }
    fun.segmentation <- segmentationCBS
    if (opt$fun_segmentation != "CBS") {
        if (opt$fun_segmentation == "PSCBS") {
            fun.segmentation <- segmentationPSCBS
            if (is.null(seg.file)) uses.recommended.fun <- TRUE
        } else if (opt$fun_segmentation == "Hclust") {
            fun.segmentation <- segmentationHclust
            if (!is.null(seg.file)) uses.recommended.fun <- TRUE
        } else if (opt$fun_segmentation == "GATK4") {
            fun.segmentation <- segmentationGATK4
        } else if (opt$fun_segmentation == "none") {
            fun.segmentation <- function(seg, ...) seg
        } else {
            stop("Unknown segmentation function")
        }
    } else if (is.null(opt$vcf)) {
        uses.recommended.fun <- TRUE
    }

    if (!uses.recommended.fun) {
        flog.warn("Recommended to provide --fun-segmentation %s.", recommended.fun)
    }

    mutect.ignore <- eval(formals(PureCN::filterVcfMuTect)$ignore)
    if (opt$error < formals(PureCN::runAbsoluteCN)$error && !is.null(opt$stats_file)) {
        flog.info("Low specified error, will keep fstar_tumor_lod flagged variants")
        mutect.ignore <- mutect.ignore[-match("fstar_tumor_lod", mutect.ignore)]
    }
    if (!is.null(opt$cores) && opt$cores > 1) {
        suppressPackageStartupMessages(library(BiocParallel))
        BPPARAM <- MulticoreParam(workers = opt$cores)
        bplog(BPPARAM) <- debug
        flog.info("Using BiocParallel MulticoreParam backend with %s cores.", opt$cores)
    } else if (opt$parallel) {
        suppressPackageStartupMessages(library(BiocParallel))
        BPPARAM <- bpparam()
        bplog(BPPARAM) <- debug
        flog.info("Using default BiocParallel backend. You can change the default in your ~/.Rprofile file.")
    }
    if (!is.null(log.ratio)) {
        flog.info("Reading %s...", log.ratio)
        log.ratio <- readLogRatioFile(log.ratio)
        if (!is.null(tumor.coverage.file)) {
            flog.info("Reading %s...", tumor.coverage.file)
            tumor.coverage.file <- readCoverageFile(tumor.coverage.file)
            # GATK4 log-ratio files are filtered for quality, so make sure that
            # coverage files align
            tumor.coverage.file <- subsetByOverlaps(tumor.coverage.file, log.ratio)
        }
        if (!is.null(normal.coverage.file)) {
            # With GATK4 log-ratios,  normal samples need to be filtered as well
            # as the tumor samples, or they won't align
            normal.coverage.file <- subsetByOverlaps(normal.coverage.file, log.ratio)
        }
        log.ratio <- log.ratio$log.ratio
    }
    vcf <- opt$vcf
    if (!is.null(vcf) && tools::file_ext(vcf) == "tsv") {
        flog.info("*.tsv file provided for --vcf, assuming GATK4 CollectAllelicCounts format")
        vcf <- readAllelicCountsFile(vcf)
    }
    args.segmentation <- list(
        alpha = opt$alpha, undo.SD = opt$undo_sd,
        additional.cmd.args = opt$additional_cmd_args
    )
    if (opt$fun_segmentation == "GATK4" && !is.null(opt$changepoints_penalty)) {
        args.segmentation$changepoint.penalty <- opt$changepoints_penalty
    }
    ret <- runAbsoluteCN(normal.coverage.file = normal.coverage.file,
            tumor.coverage.file = tumor.coverage.file, vcf.file = vcf,
            sampleid = sampleid, plot.cnv = TRUE,
            interval.file = opt$intervals,
            genome = opt$genome, seg.file = seg.file,
            log.ratio = log.ratio,
            test.purity = test.purity,
            test.num.copy = seq(0, opt$max_copy_number),
            sex = opt$sex,
            args.filterVcf = list(snp.blacklist = snp.blacklist,
                af.range = af.range, stats.file = opt$stats_file,
                min.supporting.reads = opt$min_supporting_reads,
                base.quality.offset = opt$base_quality_offset,
                min.base.quality = opt$min_base_quality,
                ignore = mutect.ignore,
                interval.padding = opt$interval_padding),
            args.filterIntervals = list(
                min.total.counts = opt$min_total_counts,
                min.fraction.offtarget = opt$min_fraction_offtarget
            ),
            fun.segmentation = fun.segmentation,
            args.segmentation = args.segmentation,
            args.setMappingBiasVcf =
                list(mapping.bias.file = opt$mapping_bias_file),
            args.setPriorVcf = list(min.cosmic.cnt = opt$min_cosmic_cnt),
            normalDB = normalDB, model.homozygous = opt$model_homozygous,
            min.ploidy = opt$min_ploidy, max.ploidy = opt$max_ploidy,
            model = opt[["model"]], log.file = file.log,
            max.segments = opt$max_segments,
            min.logr.sdev = opt$min_logr_sdev,
            error = opt$error, DB.info.flag = opt$db_info_flag,
            POPAF.info.field = opt$popaf_info_field,
            Cosmic.CNT.info.field = opt$cosmic_cnt_info_field,
            log.ratio.calibration = opt$log_ratio_calibration,
            max.non.clonal = opt$max_non_clonal,
            max.homozygous.loss = as.numeric(strsplit(opt$max_homozygous_loss, ",")[[1]]),
            post.optimize = opt$post_optimize,
            speedup.heuristics = opt$speedup_heuristics,
            vcf.field.prefix = "PureCN.",
            cosmic.vcf.file = opt$cosmic_vcf_file,
            BPPARAM = BPPARAM)
    # free memory
    vcf <- NULL
    invisible(dev.off())
    if (opt$bootstrap_n > 0) {
        ret <- bootstrapResults(ret, n = opt$bootstrap_n)
    }
    saveRDS(ret, file = file.rds, version = opt[["rds_version"]])
}

### Create output files -------------------------------------------------------

curationFile <- createCurationFile(file.rds)
if (debug) {
    curationFile$log.ratio.offset <- mean(ret$results[[1]]$log.ratio.offset)
    curationFile$log.ratio.sdev <- ret$input$log.ratio.sdev
    curationFile$num.segments <- nrow(ret$results[[1]]$seg)
    write.csv(curationFile, file = paste0(out, "_debug.csv"),
        row.names = FALSE)
}

flog.info("Generating output files...")
file.pdf <- paste0(out, ".pdf")
pdf(file.pdf, width = 10, height = 11)
plotAbs(ret, type = "all")
invisible(dev.off())

file.pdf <- paste0(out, "_local_optima.pdf")
pdf(file.pdf, width = 5, height = 5)
plotAbs(ret, 1, type = "overview")
invisible(dev.off())

file.seg <- paste0(out, "_dnacopy.seg")
seg <- ret$results[[1]]$seg
seg <- seg[, c(1:6, match("C", colnames(seg)))]
write.table(seg, file = file.seg, sep = "\t", quote = FALSE,
    row.names = FALSE)

if (is(ret$results[[1]]$gene.calls, "data.frame")) {
    file.genes <- paste0(out, "_genes.csv")
    allAlterations <- callAlterations(ret, all.genes = TRUE)

    write.csv(cbind(Sampleid = sampleid, gene.symbol = rownames(allAlterations),
        allAlterations), row.names = FALSE, file = file.genes, quote = FALSE)
    if (!is.null(opt$normaldb)) {
        if (is.null(normalDB)) normalDB <- readRDS(opt$normaldb)
        if (normalDB$version >= 8) {
            file.amps <- paste0(out, "_amplification_pvalues.csv")
            allAmplifications <- callAmplificationsInLowPurity(ret, normalDB,
                all.genes = TRUE, BPPARAM = BPPARAM)

            write.csv(cbind(Sampleid = sampleid, gene.symbol = rownames(allAmplifications),
                allAmplifications), row.names = FALSE, file = file.amps, quote = FALSE)
        }
    }
} else {
    flog.warn("--intervals does not contain gene symbols. Not generating gene-level calls.")
}
    
if (!is.null(ret$input$vcf) &&
    !is.null(ret$results[[1]]$SNV.posterior)) {
    if (opt$out_vcf) {
        file.vcf <- paste0(out, ".vcf")
        vcfanno <- predictSomatic(ret, return.vcf = TRUE)
        writeVcf(vcfanno, file = file.vcf)
        bgzip(file.vcf, paste0(file.vcf, ".gz"), overwrite = TRUE)
        indexTabix(paste0(file.vcf, ".gz"), format = "vcf")
    }
    file.csv <- paste0(out, "_variants.csv")
    write.csv(cbind(Sampleid = sampleid, predictSomatic(ret)), file = file.csv,
        row.names = FALSE, quote = FALSE)

    file.loh <- paste0(out, "_loh.csv")
    write.csv(cbind(Sampleid = sampleid, PureCN::callLOH(ret)), file = file.loh,
        row.names = FALSE, quote = FALSE)

    file.pdf <- paste0(out, "_chromosomes.pdf")
    pdf(file.pdf, width = 9, height = 10)
    chromosomes <- seqlevelsInUse(ret$input$vcf[ret$results[[1]]$SNV.posterior$vcf.ids])
    chromosomes <- chromosomes[orderSeqlevels(chromosomes)]
    for (chrom in chromosomes) {
        plotAbs(ret, 1, type = "BAF", chr = chrom)
    }
    invisible(dev.off())
}
lima1/PureCN documentation built on June 15, 2025, 5:30 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
lima1/PureCN
Copy number calling and SNV classification using targeted short read sequencing

inst/extdata/PureCN.R
In lima1/PureCN: Copy number calling and SNV classification using targeted short read sequencing

R Package Documentation

Browse R Packages

We want your feedback!

lima1/PureCN Copy number calling and SNV classification using targeted short read sequencing

inst/extdata/PureCN.R In lima1/PureCN: Copy number calling and SNV classification using targeted short read sequencing

R Package Documentation

Browse R Packages

We want your feedback!

lima1/PureCN
Copy number calling and SNV classification using targeted short read sequencing

inst/extdata/PureCN.R
In lima1/PureCN: Copy number calling and SNV classification using targeted short read sequencing