R/find_window_vpr.r

Defines functions calc_window_vpr

#' Get Windows and V(pr) for a chromosome
#'
#' @param pos Integer or Numeric vector with genomic positions within the same chromosome to segment.
#' @param vaf Numeric vector of VAFs (variant allele frequencies) for a given layer. Must correspond to sites in pos.
#' @param seg_start An integer or numeric value. Where to start segmentation? Default is 1 (the start of the chromosome).
#' @param seg_end An integer or numeric value. Where to end segmentation? Default (0) uses the maximum value of `pos`.
#' @param sensitivity A number. Penalty value for segmentation via `changepoint::cpt.meanvar`
#' @param models_cdf A numeric vector containing the cumulative density for a VAF model generated by `gen_ideal_hist()`
#' @return a DF or list with information about windows and the vafs for SNPs within each window. (TODO: flesh this doc out more)
calc_window_vpr <- function(pos, vaf, seg_start=1, seg_end=0, sensitivity=0.2,
                            bin_edges, models_cdf){
    # The authors make no representations about the suitability of this software
    # for any purpose. It is provided "as is" without express or implied warranty.

    # First, get windows using find_windows()
    win_res <- find_windows(pos=pos, vaf = vaf,
                            sensitivity = sensitivity,
                            seg_start = seg_start,
                            seg_end = seg_end)

    # compute V(pr) for each window
    fitted_vprs <- sapply(win_res$result, function(x){
        # get vpr for each layer using fit_vpr.r
        fit_vpr(x = (abs(x$vaf-0.5)+0.5), models = models_cdf, bin_edges = bin_edges)
    })

    # if : segment difference between V(pr)s in segment is less than a threshold of 0.011
    # then: combine segment with the previous one.
    d_fitted_vprs <- diff(fitted_vprs)
    idx_same_vprs <- which(abs(d_fitted_vprs) <= 0.011)

    while (idx_same_vprs != logical(0)){

        # step 1: combine segments that fail the threshold with their predecessors.
        new_wins <- lapply(idx_same_vprs, function(i)){
            new_window <- rbind(win_res$result[[i]], win_res[[i-1]])
            # update window bounds
            new_window$window_start <- min(new_window$window_start)
            new_window$window_end <- max(new_window$window_end)

            # ensure we didn't let any duplicates occur
            new_window <- new_window[!is.duplicated(new_window$pos),]
            # update window bound metadata
            new_window$window_site_length <- nrow(new_window)
            new_window$window_bp_length <- new_window$window_end - new_window$window_start
            return(new_window)
        }

        # step 2
        new_vprs <- sapply(new_wins, function(x){
            # get vpr for each layer using fit_vpr.r
            fit_vpr(x = (abs(x$vaf-0.5)+0.5), models = models_cdf, bin_edges = bin_edges)
        })

        # re-assess threshold for reducing segment size
        d_fitted_vprs <- diff(new_vprs)
        idx_same_vprs <- which(d_fitted_vprs <= 0.011)
    }

    # TODO: combine window metadata with vprs
    # TODO: combine new and old vpr (original-passed + new-combined) and return

    # return dataframe with results!
    res <- data.frame(window_start,
                      window_end,
                      window_length_sites,
                      window_length_bp,
                      Vpr)
    return(res)
}
paularstrpo/GeTalleleR documentation built on Aug. 8, 2020, 12:51 p.m.