R/simulatePanel.R

Defines functions simulatePanel

Documented in simulatePanel

#' Downsample a whole exome sequencing (WES) dataset to a subset only containing
#' genes targeted in the panel
#'
#' This function generates a subset of a whole exome sequencing dataset which
#' only includes the genes of a given gene panel. This subset may then be used
#' to simulate panel-based TMB quantification.
#'
#' @param WES a list generated by applyFilters or applyInputToTMB with variants,
#' filter, design and sample elements.
#' @param WES.design a \code{GRanges} object containing WES sequencing design. 
#' Used to remove off target mutations
#' @param panel.design a \code{GRanges} object containing sequencing design of
#' the panel tyou want to simuate. Used to subset WES.
#' @param assembly human genome assembly: "hg19" or "hg38"
#'
#' @return Returns a \code{GRanges} object containing variants from the
#' simulated panel, which is only those variants from WES falling in the regions
#' targeted by the panel
#'
#'
#' @author Laura Fancello
#'
simulatePanel <- function(WES, WES.design, panel.design, assembly){

    filter=WES$filter
    sample=WES$sample
    WES=WES$variants

    # Sanity Checks  -----------------------------------------------------------    
    ## Check input arguments
    if (is.null(WES)) {
        stop("argument 'WES' is missing, with no default")
    }
    if (methods::is(WES)[1] == "CollapsedVCF") {
        WES=SummarizedExperiment::rowRanges(WES)
    }else{
        if (methods::is(WES)[1] == "data.frame") {
                WES=GenomicRanges::makeGRangesFromDataFrame(WES,
                                             seqnames.field = "CHR",
                                             start.field = "START",
                                             end.field = "END",
                                             strand.field = "STRAND",
                                             keep.extra.columns = FALSE)
        }
        else{
            if(methods::is(WES)[1] != "GRanges"){
                stop("No valid object with variants from WES: please provide a GRanges
             object.")
            }
        }
    }
    if (is.null(WES.design)) {
        stop("argument 'WES.design' is missing, with no default")
    }
    if (!(methods::is(WES.design)[1] == "GRanges")) {
            stop("No valid WES design found: please provide a GRanges object containing WES
         sequencing design")
    }
    if (is.null(panel.design)) {
        stop("argument 'panel.design' is missing, with no default")
    }
    if (!(methods::is(panel.design)[1] == "GRanges")) {
        stop("No valid panel design found: please provide a GRanges object 
             containing sequencing design for the panel to simulate")
    }
    if (is.null(assembly)) {
        stop("argument 'assembly' is missing, with no default: please specify 'hg19' or 'hg38'")
    }
    if ((assembly != "hg19") && (assembly != "hg38")) {
        stop("No valid genome specified: please specify 'hg19' or 'hg38'")
    }
        
    # Preprocess input  --------------------------------------------------------
    ## Set UCSC style and allowed chromosome names.
    GenomeInfoDb::seqlevelsStyle(WES) <- "UCSC"
    keepchr <- c("chr1", "chr2", "chr3", "chr4", "chr5", "chr6",
                 "chr7", "chr8", "chr9", "chr10", "chr11", "chr12",
                 "chr13", "chr14", "chr15", "chr16", "chr17", "chr18",
                 "chr19", "chr20", "chr21", "chr22", "chrX", "chrY")
    GenomeInfoDb::seqlevels(WES, pruning.mode="coarse") <- keepchr

    # Remove off-target mutations from WES (e.g. mutations identified
    # by WES but not nincluded in sequencing design) by overlapping with GRanges
    # object with design
    WES_ok <- IRanges::subsetByOverlaps(WES, WES.design)
    
    # Simulate panel by subsetting WES  ----------------------------------------
    # Remove from WES variants not included in panel design
    simulatedPanel_gr <- IRanges::subsetByOverlaps(WES_ok, panel.design)

    # Generate output list with selected variants, filter description, object
    # containing simulated panel sequencing design, sample name
    simulatedPanel=list(variants=simulatedPanel_gr,
                        filter=filter,
                        design=panel.design,
                        sample=sample)

    return(simulatedPanel)
}
acc-bioinfo/TMBleR documentation built on Dec. 18, 2021, 10:21 p.m.