ComputePeaksVAF_nf: Compute Peaks VAF (for nf)

Usage Arguments Examples

View source: R/ComputePeaksVAF_nf.R

Usage

1
ComputePeaksVAF_nf(filename.ssm, filename.segs, filename.purity.ploidy, filename.config, filename.output, output_dir)

Arguments

filename.ssm
filename.segs
filename.purity.ploidy
filename.config
filename.output
output_dir

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (filename.ssm, filename.segs, filename.purity.ploidy,
    filename.config, filename.output, output_dir)
{
    source(filename.config)
    message("- sourced config file")
    message("- reading in data ", filename.ssm)
    snvs <- read.snvs(filename.ssm, chrs, nucleotides = c("A",
        "C", "G", "T"))
    message(paste(filename.ssm, " - vcf read"))
    segs <- read.segs(filename.segs, chrs)
    message(paste(filename.segs, " - subclones read"))
    purity.ploidy <- read.delim(filename.purity.ploidy, sep = "\t",
        header = T, stringsAsFactors = F)
    message(paste(filename.purity.ploidy, " - purity read"))
    purity <- purity.ploidy[1, "cellularity"]
    peaks <- list(states = list(), summary = list())
    message("- computing copy-number-state-specific VAF distribution peaks")
    for (state in intersect(states, segs$state)) {
        vafs <- snvs$vaf[queryHits(findOverlaps(snvs, segs[segs$state ==
            state, ]))]
        n <- length(vafs)
        if (n > 1) {
            peaks$states[[state]] <- list()
            peaks$states[[state]]$vafs <- vafs
            peaks$states[[state]]$n <- n
            copy.numbers <- as.numeric(unlist(str_split(state,
                ":")))
            possible.multiplicities <- 1:max(copy.numbers)
            peaks$states[[state]]$expected.locs <- data.frame(state = state,
                ploidy = sum(as.numeric(unlist(str_split(state,
                  ":")))), multiplicity = possible.multiplicities,
                peak.expected = sapply(possible.multiplicities,
                  computed.expected.peak.loc, purity, sum(copy.numbers)),
                peak.matched = NA, peak.diff = NA, purity.expected = purity,
                purity.reestimated = NA, purity.diff = NA)
            peaks$states[[state]]$den <- density(peaks$states[[state]]$vafs,
                kernel = "gaussian", n = 2048)
            peaks$states[[state]]$peaks <- call.peaks(peaks$states[[state]]$den)
            peaks.avail <- peaks$states[[state]]$peaks[peaks$states[[state]]$peaks$y >
                thres.peaks.y, ]
            for (i in nrow(peaks$states[[state]]$expected.locs):1) {
                multiplicity <- peaks$states[[state]]$expected.locs$multiplicity[i]
                match.clonal <- state %in% c("1:0", "1:1") |
                  multiplicity == 2
                match.successful <- F
                while (!match.successful & nrow(peaks.avail)) {
                  if (match.clonal) {
                    best.peak <- which(peaks.avail$x == max(peaks.avail$x))
                  }
                  else {
                    peaks.avail$diff <- abs(peaks.avail$x - peaks$states[[state]]$expected.locs$peak.expected[i])
                    best.peak <- which(peaks.avail$diff == min(peaks.avail$diff))
                  }
                  peak.matched <- peaks.avail$x[best.peak]
                  purity.reestimated <- reestimate.purity(peak.matched,
                    multiplicity, peaks$states[[state]]$expected.locs$ploidy[i])
                  match.successful <- purity.reestimated < 1
                  if (!match.successful)
                    peaks.avail <- peaks.avail[-best.peak, ]
                }
                if (match.successful) {
                  peaks$states[[state]]$expected.locs$peak.matched[i] <- peak.matched
                  peaks$states[[state]]$expected.locs$purity.reestimated[i] <- purity.reestimated
                }
            }
            peaks$states[[state]]$expected.locs$peak.diff <- peaks$states[[state]]$expected.locs$peak.matched -
                peaks$states[[state]]$expected.locs$peak.expected
            peaks$states[[state]]$expected.locs$purity.diff <- peaks$states[[state]]$expected.locs$purity.reestimated -
                peaks$states[[state]]$expected.locs$purity.expected
        }
    }
    n.mutations <- sapply(peaks$states, function(i) i$n)
    if (length(n.mutations) > 0 & !all(n.mutations == 0)) {
        states.consider <- names(n.mutations)[n.mutations/sum(n.mutations) >
            thres.peak.prop.n]
        peaks$summary$expected.locs <- do.call("rbind", lapply(states.consider,
            function(state) peaks$states[[state]]$expected.locs))
        peaks$summary$expected.locs <- peaks$summary$expected.locs[!is.na(peaks$summary$expected.locs$purity.diff),
            ]
        if (nrow(peaks$summary$expected.locs) == 0)
            peaks$summary$expected.locs <- NA
    }
    else {
        peaks$summary$expected.locs <- NA
    }
    peaks$summary$purity.old <- purity
    if (is.null(nrow(peaks$summary$expected.locs))) {
        for (element in c("purity.new", "eta")) peaks$summary[[element]] <- NA
    }
    else {
        if (nrow(peaks$summary$expected.locs) > 1)
            peaks$summary$expected.locs <- peaks$summary$expected.locs[order(abs(peaks$summary$expected.locs$purity.diff),
                decreasing = T), ][-1, ]
        q <- table(peaks$summary$expected.locs$state)[peaks$summary$expected.locs$state]
        N <- sum(n.mutations[unique(as.character(peaks$summary$expected.locs$state))])
        peaks$summary$expected.locs$w <- n.mutations[as.character(peaks$summary$expected.locs$state)]/(N *
            q)
        if (abs(sum(peaks$summary$expected.locs$w) - 1) > 1e-06)
            stop("expected peak weights do not sum to 1")
        peaks$summary$purity.new <- sum(peaks$summary$expected.locs$w *
            peaks$summary$expected.locs$purity.reestimated)
        peaks$summary$eta <- abs(peaks$summary$purity.new - peaks$summary$purity.old)
    }
    message("- saving peak data to ", filename.output)
    save(peaks, file = filename.output)
    print(peaks$summary$eta)
    if (is.na(peaks$summary$eta)) {
        write.csv("FAILPEAKS", paste0(output_dir, "/FAILPEAKS"),
            quote = F, row.names = F)
    }
    else if (peaks$summary$eta > 0.05) {
        write.csv("FAILPEAKS", paste0(output_dir, "/FAILPEAKS"),
            quote = F, row.names = F)
    }
    else if (peaks$summary$eta <= 0.05) {
        write.csv("PASSPEAKS", paste0(output_dir, "/PASSPEAKS"),
            quote = F, row.names = F)
    }
    print(paste("Written indicator peak calling pass/fail file for ",
        sampledir))
  }

afrangou/CleanCNA documentation built on Dec. 28, 2021, 8:21 p.m.