R/setPriorVcf.R

Defines functions .annotateVcfPrior setPriorVcf

Documented in setPriorVcf

#' Set Somatic Prior VCF
#' 
#' Function to set prior for somatic mutation status for each variant in the
#' provided \code{CollapsedVCF} object.
#' 
#' 
#' @param vcf \code{CollapsedVCF} object, read in with the \code{readVcf}
#' function from the VariantAnnotation package.
#' @param prior.somatic Prior probabilities for somatic mutations. First value
#' is for the case when no matched normals are available and the variant is not
#' in germline databases (second value). Third value is for variants with MuTect
#' somatic call. Different from 1, because somatic mutations in segments of copy
#' number 0 have 0 probability and artifacts can thus have dramatic influence on
#' likelihood score. Forth value is for variants not labeled as somatic by
#' MuTect. Last two values are optional, if vcf contains a flag Cosmic.CNT, it
#' will set the prior probability for variants with CNT > 6 to the first of
#' those values in case of no matched normal available (0.995 default).  Final
#' value is for the case that variant is in both germline databases and
#' COSMIC count > 6.
#' @param tumor.id.in.vcf Id of tumor in case multiple samples are stored in
#' VCF.
#' @param min.cosmic.cnt Minimum number of hits in the COSMIC database to 
#' call variant as likely somatic.
#' @param DB.info.flag Flag in INFO of VCF that marks presence in common
#' germline databases. Defaults to \code{DB} that may contain somatic variants
#' if it is from an unfiltered germline database.
#' @param Cosmic.CNT.info.field Info field containing hits in the Cosmic database
#' @return The \code{vcf} with \code{numeric(nrow(vcf))} vector with the
#' prior probability of somatic status for each variant in the 
#' \code{CollapsedVCF} added to the \code{INFO} field \code{PR}.
#' @author Markus Riester
#' @examples
#' 
#' # This function is typically only called by runAbsoluteCN via the 
#' # fun.setPriorVcf and args.setPriorVcf comments.
#' vcf.file <- system.file("extdata", "example.vcf.gz", package="PureCN")
#' vcf <- readVcf(vcf.file, "hg19")
#' vcf <- setPriorVcf(vcf)        
#' 
#' @export setPriorVcf
setPriorVcf <- function(vcf, prior.somatic = c(0.5, 0.0005, 0.999, 0.0001,
                                               0.995, 0.5),
                        tumor.id.in.vcf = NULL, min.cosmic.cnt = 6, 
                        DB.info.flag = "DB", Cosmic.CNT.info.field = "Cosmic.CNT") {
    if (is.null(tumor.id.in.vcf)) {
        tumor.id.in.vcf <- .getTumorIdInVcf(vcf)
    }
    if (!is.null(info(vcf)$SOMATIC)) {
         tmp <- prior.somatic
         prior.somatic <- ifelse(info(vcf)$SOMATIC,
            prior.somatic[3],prior.somatic[4])

         flog.info("Found SOMATIC annotation in VCF.")
         flog.info("Setting somatic prior probabilities for somatic variants to %f or to %f otherwise.", 
            tmp[3], tmp[4])
    } else {
         tmp <- prior.somatic
         prior.somatic <- ifelse(info(vcf)[[DB.info.flag]],
            prior.somatic[2], prior.somatic[1])
         if (!is.null(info(vcf)[[Cosmic.CNT.info.field]])) {
             flog.info("Found COSMIC annotation in VCF. Requiring %i hits.", 
                min.cosmic.cnt)
             flog.info("Setting somatic prior probabilities for hits to %f or to %f if in both COSMIC and likely germline based on dbSNP membership or population allele frequency.", 
                tmp[5], tmp[6])

             prior.somatic[which(info(vcf)[[Cosmic.CNT.info.field]] >= min.cosmic.cnt)] <- tmp[5]
             prior.somatic[which(info(vcf)[[Cosmic.CNT.info.field]] >= min.cosmic.cnt & 
                info(vcf)[[DB.info.flag]])] <- tmp[6]
         } else {
             flog.info("Setting somatic prior probabilities for likely germline hits to %f or to %f otherwise.", 
                tmp[2], tmp[1])
         }      
    }     
    .annotateVcfPrior(vcf, prior.somatic)
}
.annotateVcfPrior <- function(vcf, prior.somatic) {
    key <- paste0(.getPureCNPrefixVcf(vcf), "PR")
    newInfo <- DataFrame(
        Number = 1, Type = "Float",
        Description = "Prior probability somatic",
        row.names = key)
    
    info(header(vcf)) <- rbind(info(header(vcf)), newInfo)
    info(vcf)[[key]] <- prior.somatic
    return(vcf)
}
lima1/PureCN documentation built on April 24, 2024, 8:23 p.m.