R/shattered.regions.r

Defines functions bed2chromo.reg get.chr.bins shattered.regions shattered.eval

Documented in bed2chromo.reg get.chr.bins shattered.eval shattered.regions

#' Data class chromo.regs
#' 
#' Class to store shattered regions and information produced by shattered.regions and shattered.regions.cnv functions
#' 
#' @param regions.summary (list): a list of data.frames sumarizing the information of shattered regions found in each sample
#' @param high.density.regions (matrix): a numeric matrix representing high breakpoint density genomic bins in each sample (values 1 = high density break; 0 = normal)
#' @param high.density.regions.hc (matrix): a numeric matrix representing high breakpoint density genomic bins in each sample (values 1 = high density break; 0 = normal). 
#' Only those bins that overlap with high confidence regions defined in regions.summary are set to = 1 
#' @param cnv.brk.dens (matrix): a numeric matrix representing the number of CNV segmentation breakpoints found in at genomic bins in each sample
#' @param svc.brk.dens (matrix): a numeric matrix representing the number of SV breakpoints found at genomic bins in each sample
#' @param cnv.brk.common.dens (matrix): a numeric matrix representing the number of CNV breakpoints colocalizing SV breakpoints found at genomic bins in each sample
#' @param svc.brk.common.dens (matrix): a numeric matrix representing the number of SV breakpoints colocalizing CNV breakpoints found at genomic bins in each sample
#' @param cnvbrk (S4): on object generated by cnv.breaks function 
#' @param svcbrk (S4): on object generated by svc.breaks function 
#' @param common.brk (list): on object generated by match.breaks function 
#' @param cnv (S4) an object of class svcnvio containing data type 'cnv' validated by validate.cnv
#' @param svc (S4) an object of class svcnvio containing data type 'svc' validated by validate.svc
#' @param param (list): list of configuration parameters provided or set as default
#' @return an instance of the class 'chromo.regs' containing breakpoint mapping onto genes
#' @export
chromo.regs <- setClass("chromo.regs",
                        representation(
                            regions.summary  = "list",
                            high.density.regions = "matrix",
                            high.density.regions.hc = "matrix",
                            cnv.brk.dens = "matrix",
                            svc.brk.dens = "matrix",
                            cnv.brk.common.dens = "matrix",
                            svc.brk.common.dens = "matrix",
                            cnvbrk = 'breaks',
                            svcbrk = "breaks",
                            common.brk = "list",
                            cnv = "svcnvio",
                            svc = "svcnvio",
                            param = "list"
                        ))


setMethod("show","chromo.regs",function(object){
    
    conf <- table(names(unlist(lapply(unname(object@regions.summary),function(x) table(x[,"conf"])))))
    
    writeLines(paste("An object of class chromo.regs from svpluscnv containing the following stats:",
                "\nNumber of samples tested=",nrow(object@high.density.regions),
                "\nNumber of samples with high-breakpoint density bins=",length(which(apply(object@high.density.regions,1,sum) != 0)),
                "\nNumber of samples with high-confidence shattered regions=",conf["HC"],
               "\nNumber of samples with low-confidence shattered regions=",conf["lc"]))
})


#' Return the binary matrix containing high confidence high-breakpoint-densityregion definitions
#' @param object (chromo.regs) An object of class chromo.regs 
#' @param conf (character) Either "hc" for high confidence HBD or else include all
#' @return an instance of the class 'chromo.regs' containing breakpoint mapping onto genes
#' @export
#' @docType methods
#' @rdname hbd.mat-methods

setGeneric("hbd.mat", function(object, conf="hc") standardGeneric("hbd.mat"))

#' @rdname hbd.mat-methods
#' @export

setMethod("hbd.mat", "chromo.regs", function(object, conf="hc"){ 
    if(conf == "hc"){
      object@high.density.regions.hc
    }else{
      object@high.density.regions
    }
})

#' Return the genomicRanges object  containing the genomic bins 
#' @param object (chromo.regs) An object of class chromo.regs 
#' @return an genomicRanges object with defined genomic bins
#' @export
#' @docType methods
#' @rdname extract.bins-methods

setGeneric("extract.bins", function(object) standardGeneric("extract.bins"))

#' @rdname extract.bins-methods
#' @export

setMethod("extract.bins", "chromo.regs", function(object){
  binNames <- colnames(object@high.density.regions)
  bindf <- remove.factors(data.frame(do.call(rbind,strsplit(binNames," "))))
  bindf[,2] <- as.numeric(bindf[,2])
  bindf[,3] <- as.numeric(bindf[,3])
  colnames(bindf) <- c("chrom","start","end")
  bingr <- with(bindf,GRanges(chrom, IRanges(start=start, end=end), name=binNames))
  return(bingr)
})


#' Evaluate true catastrophic events
#' Evaluate shattered regions based on interleaved breaks and breakpoint dispersion parameters in order to identify true catastrophic chromosomal alterations
#' 
#' @param chromo.regs.obj (chromo.regs) An object of class chromo.regs 
#' @param interleaved.cut (numeric) the percentage of non interleaved structural variant calls 
#' @param dist.iqm.cut (numeric) interquantile average of the distance between breakpoints within a shattered region
#' @param verbose (logical)
#' @return an instance of the class 'chromo.regs' containing breakpoint mapping onto genes
#' @export
#' 

shattered.eval <- function(chromo.regs.obj,
                           interleaved.cut=0.5,
                           dist.iqm.cut=100000,
                           verbose=TRUE){
    
    svcdat <- chromo.regs.obj@svc@data
    cnvbrk  <- chromo.regs.obj@cnvbrk
    svcbrk <- chromo.regs.obj@svcbrk
    
    if(verbose){
        message(paste("Evaluating shattered regions in ",length(names(chromo.regs.obj@regions.summary))," samples",sep="") )
        pb <- txtProgressBar(style=3)
        cc <-0
        tot <- length(chromo.regs.obj@regions.summary)
    }
    
    for(cl in names(chromo.regs.obj@regions.summary)){
        
        if(verbose) cc <- cc+1
        if(verbose) setTxtProgressBar(pb, cc/tot)
        
        regions <-   chromo.regs.obj@regions.summary[[cl]]
        br1 <- cnvbrk@breaks[which(cnvbrk@breaks$sample == cl),2:3]
        br2 <- svcbrk@breaks[which(svcbrk@breaks$sample == cl),2:3]
        colnames(br1) <- colnames(br2) <- c("chrom","pos")
        br1.gr <- with(br1, GRanges(chrom, IRanges(start=pos, end=pos)))
        br2.gr <- with(br2, GRanges(chrom, IRanges(start=pos, end=pos)))
        
        c.br1 <- chromo.regs.obj@common.brk$brk1_match[which(chromo.regs.obj@common.brk$brk1_match$sample == cl),2:3]
        c.br2 <- chromo.regs.obj@common.brk$brk2_match[which(chromo.regs.obj@common.brk$brk2_match$sample == cl),2:3]
        colnames(c.br1) <- colnames(c.br2) <- c("chrom","pos")
        c.br1.gr <- with(c.br1, GRanges(chrom, IRanges(start=pos, end=pos)))
        c.br2.gr <- with(c.br2, GRanges(chrom, IRanges(start=pos, end=pos)))
        
        regions_gr <- with(regions, GRanges(chrom, IRanges(start=start, end=end)))
        hits_1 = suppressWarnings(GenomicAlignments::findOverlaps(regions_gr,br1.gr))
        hits_2 = suppressWarnings(GenomicAlignments::findOverlaps(regions_gr,br2.gr))
        
        c.hits_1 = suppressWarnings(GenomicAlignments::findOverlaps(regions_gr,c.br1.gr))
        c.hits_2 = suppressWarnings(GenomicAlignments::findOverlaps(regions_gr,c.br2.gr))
        
        svdat_i <- svcdat[which(svcdat$sample == cl),]
        sv_ranges_ori <-   with(svdat_i, GRanges(chrom1, IRanges(start=pos1, end=pos1)))
        sv_ranges_dest <-   with(svdat_i, GRanges(chrom2, IRanges(start=pos2, end=pos2)))
        hits_ori = suppressWarnings(GenomicAlignments::findOverlaps(regions_gr,sv_ranges_ori))
        hits_dest = suppressWarnings(GenomicAlignments::findOverlaps(regions_gr,sv_ranges_dest))
        
        # calculate interleaved score, n.breaks, dispersion, and median distance between consecutive breaks
        start <- end <- reg.size <- dist.iqm.cnv <- dist.iqm.sv <- n.brk.cnv <- n.brk.sv <- n.orth.cnv <- n.orth.sv <- interleaved <- rep(0,nrow(regions))
        conf <- rep("HC",nrow(regions))
        for(i in 1:nrow(regions)){
            sites1 <- sort(unique(br1[subjectHits(hits_1)[which(queryHits(hits_1) == i)],]$pos))
            dist.iqm.cnv[i]  <- IQM(sites1[2:length(sites1)] - sites1[1:(length(sites1)-1) ],lowQ = 0.2,upQ = 0.8)
            n.brk.cnv[i] <- length(sites1)  
            
            sites2 <- sort(unique(br2[subjectHits(hits_2)[which(queryHits(hits_2) == i)],]$pos))
            dist.iqm.sv[i]  <- IQM(sites2[2:length(sites2)] - sites2[1:(length(sites2)-1) ],lowQ = 0.2,upQ = 0.8)
            n.brk.sv[i] <- length(sites2)  
            
            c.sites1 <- sort(unique(c.br1[subjectHits(c.hits_1)[which(queryHits(c.hits_1) == i)],]$pos))
            n.orth.cnv[i] <- length(c.sites1)  
            
            c.sites2 <- sort(unique(c.br2[subjectHits(c.hits_2)[which(queryHits(c.hits_2) == i)],]$pos))
            n.orth.sv[i] <- length(c.sites2)  
            
            start[i] <- min(c(sites1,sites2))
            end[i] <- max(c(sites1,sites2))
            
            a<-svdat_i[subjectHits(hits_ori)[which(queryHits(hits_ori) == i)],]
            b<-svdat_i[subjectHits(hits_dest)[which(queryHits(hits_dest) == i)],]
            apos<-a$pos1
            names(apos) <- rownames(a)
            bpos<-b$pos2
            names(bpos) <- rownames(b)
            brkids <- names(sort(c(apos,bpos)))
            reg.size <- regions$end-regions$start
            ct<-0
            for(x in 2:length(brkids)) if(brkids[x] == brkids[x-1]) ct<-ct+1
            interleaved[i] <- ct/length(unique(brkids))
        }
        chrom <-regions[,"chrom"]
        nbins<-regions[,"nbins"]
        regions <- data.table(chrom,start,end,nbins)
        
        # Find links between shattered regions   
        if(nrow(chromo.regs.obj@regions.summary[[cl]]) > 1 ){
            record_mat<- matrix(nrow=0,ncol=2)
            links <- rep("",nrow(regions))
            for(i in 1:nrow(regions)){
                for(j in 1:nrow(regions)){
                    region_a_hits <- subjectHits(hits_ori)[which(queryHits(hits_ori) == i)]
                    region_b_hits <- subjectHits(hits_dest)[which(queryHits(hits_dest) == j)]
                    if(length(intersect(region_a_hits,region_b_hits)) > 0 ){
                        record_mat <- rbind(record_mat,c(i,j),c(j,i))
                    }
                }
            }
            for(i in 1:nrow(regions)){
                links[i] <- paste(setdiff(as.character(sort(unique(c(i, record_mat[which(record_mat[,1] == i),2]) ))),as.character(i)),collapse=",")
                if(length(grep("[0-9]",links[i])) == 0) links[i]<-"-"
            }
        }else{
            links <- "-"
        }
        if(!is.null(interleaved.cut)) conf[which(interleaved >= interleaved.cut)] <- "lc"
        if(!is.null(dist.iqm.cut)) conf[which(apply(cbind(dist.iqm.cnv,dist.iqm.sv),1,mean) < dist.iqm.cut)] <- "lc"
        conf[which(links != "-")] <- "HC"
        chromo.regs.obj@regions.summary[[cl]] <- data.table(regions,links,reg.size,dist.iqm.cnv,dist.iqm.sv,n.brk.cnv,n.brk.sv,n.orth.cnv,n.orth.sv,interleaved,conf)
    }
    if(verbose) close(pb)
    
    
    bins <- data.table(do.call(rbind,strsplit(colnames(chromo.regs.obj@high.density.regions)," ")),colnames(chromo.regs.obj@high.density.regions))
    colnames(bins) <- c("chrom","start","end","binid")
    bins$start <- as.numeric(bins$start)
    bins$end <- as.numeric(bins$end)

    binsGR <- with(bins, GRanges(chrom, IRanges(start=start, end=end)))
    highDensityRegionsHC <- chromo.regs.obj@high.density.regions
    for(cl in names(chromo.regs.obj@regions.summary)){
        lc <- chromo.regs.obj@regions.summary[[cl]][which(chromo.regs.obj@regions.summary[[cl]]$conf == "lc"),]
        if(nrow(lc) > 0){
            lcGR<- with(lc, GRanges(chrom, IRanges(start=start, end=end)))
            hits = suppressWarnings(GenomicAlignments::findOverlaps(binsGR,lcGR))
            highDensityRegionsHC[cl,bins$bins[unique(queryHits(hits)),]] <- 0
        }
    }
    chromo.regs.obj@high.density.regions.hc <- highDensityRegionsHC
    
    return(chromo.regs.obj)
}



#' Shattered region detection
#' 
#' Caller for the identification of shattered genomic regions based on CNV and SVC data
#' 
#' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv
#' @param svc (S4) an object of class svcnvio containing data type 'svc' initialized by validate.svc
#' @param fc.pct (numeric) inherited from cnv.breaks(); copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents a fold change of 0.8 or 1.2
#' @param min.cnv.size (numeric) inherited from cnv.breaks(); The minimun segment size (in base pairs) to include in the analysis 
#' @param min.num.probes (numeric) inherited from cnv.breaks(); The minimun number of probes per segment to include in the analysis 
#' @param low.cov (data.frame) inherited from cnv.breaks(), svc.breaks() and match.breaks; a data.frame (chr, start, end) indicating low coverage regions to exclude from the analysis
#' @param clean.brk (numeric) inherited from cnv.breaks(); n cutoff for redundant breakpoints to filter out; if NULL, no filter will be applied
#' @param window.size (numeric) size in megabases of the genmome bin to compute break density 
#' @param slide.size (numeric) size in megabases of the sliding genmome window
#' @param num.cnv.breaks (numeric) number of segmentation breakpoints per segments to be considered high-density break 
#' @param num.cnv.sd (numeric) number of standard deviations above the sample average for num.cnv.breaks
#' @param num.svc.breaks (numeric) number of svc breakpoints per segments to be considered high-density break  
#' @param num.svc.sd (numeric) number of standard deviations above the sample average for num.svc.breaks
#' @param num.common.breaks (numeric) number of common SV and segmentation breakpoints per segments to be considered high-density break
#' @param num.common.sd (numeric) number of standard deviations above the sample average for num.common.breaks
#' @param maxgap (numeric) inherited from match.breaks(); sets the maximum gap between co-localizing orthogonal breakpoints
#' @param chrlist (character) vector containing chromosomes to include in the analysis; if NULL all chromosomes available in the input will be included
#' @param chr.lim (data.frame) 3 column table (chrom, begin, end) indicating the chromosome most distal coordinates with coverage. Also returned by the function svpluscnv::chromosome.limit.coords.
#' @param interleaved.cut (numeric) 0-1 value indicating percentage of interleaved (non-contiguous) SV breakpoint pairs
#' @param dist.iqm.cut (numeric) interquantile average of the distance between breakpoints within a shattered region
#' @param verbose (logical)
#' @return an instance of the class 'chromo.regs' containing breakpoint mapping onto genes
#' @keywords chromothripsis, chromoplexy, chromosome shattering
#' @export
#' @examples
#' 
#' ## validate input data.frames
#' cnv <- validate.cnv(segdat_lung_ccle)
#' svc <- validate.svc(svdat_lung_ccle)
#' 
#' shattered.regions(cnv,svc)


shattered.regions <- function(cnv, 
                              svc,
                              fc.pct = 0.2, 
                              min.cnv.size = 0, 
                              min.num.probes=0, 
                              low.cov = NULL,
                              clean.brk=NULL,
                              window.size = 10,
                              slide.size = 2,
                              num.cnv.breaks = 6, 
                              num.cnv.sd = 5,
                              num.svc.breaks = 6, 
                              num.svc.sd = 5,
                              num.common.breaks = 3, 
                              num.common.sd = 3,
                              maxgap=10000, 
                              chrlist=NULL, 
                              chr.lim=NULL,
                              interleaved.cut = 0.33, 
                              dist.iqm.cut = 1e+05, 
                              verbose=TRUE){
  
    stopifnot(cnv@type == "cnv")
    cnvdat <- cnv@data
    
    stopifnot(svc@type == "svc")
    svcdat <- svc@data
    
    if(is.null(chr.lim)){
      chr.lim <- chromosome.limit.coords(cnv)
    }else{
      stopifnot(ncol(chr.lim) == 3)   
    }
    if(!is.null(chrlist)){
      chr.lim <- chr.lim[which(chr.lim$chrom %in% chrlist)]
    }
    
  if(verbose) message("Finding 'cnv' breakpoints")
  cnvbrk <- cnv.breaks(cnv = cnv, 
                       fc.pct = fc.pct, 
                       min.cnv.size = min.cnv.size,
                       min.num.probes = min.num.probes,
                       low.cov = low.cov, 
                       clean.brk = clean.brk,
                       chrlist=chrlist,
                       verbose = verbose)
  
  if(verbose) message("Finding 'svc' breakpoints")
  svcbrk <- svc.breaks(svc = svc, 
                       chrlist=chrlist,
                       low.cov = low.cov)
  
  if(verbose) message("Matching 'cnv' and 'svc' breakpoints")
  common.brk <- match.breaks(cnvbrk,svcbrk,
                             maxgap = maxgap, 
                             verbose = verbose,
                             plot=FALSE)
  
  if(is.null(chrlist)) chrlist <- unique(c(cnvdat$chrom,svcdat$chrom1,svcdat$chrom2))
  chrlist <- chr.sort(chrlist)
  
  if(verbose) message("Mapping 'cnv' breakpoints across the genome:")
  cnvbrk.dens <- break.density(cnvbrk, chr.lim = chr.lim, window.size = window.size, slide.size = slide.size, verbose = verbose)

  if(verbose) message("Mapping 'svc' breakpoints across the genome:")
  svcbrk.dens <- break.density(svcbrk,chr.lim = chr.lim, window.size = window.size, slide.size = slide.size,verbose = verbose)
    
    burden1 <- common.brk$restab$matched.brk1
    burden2 <- common.brk$restab$matched.brk2
    names(burden1) <- names(burden2)  <- common.brk$restab$sample
    
  common.brk1 <- breaks(breaks = common.brk$brk1_match,
                      burden=burden1,
                      param=list(datatype="combined"))
  
  common.brk2 <- breaks(breaks = common.brk$brk2_match,
                       burden = burden2,
                       param=list(datatype="combined"))

  if(verbose) message("Mapping 'cnv' validated breakpoints across the genome:")
  cnvbrk.common.dens <- break.density(common.brk1, chr.lim = chr.lim, window.size = window.size, slide.size = slide.size,verbose = verbose)
  
  if(verbose) message("Mapping 'svc' validated breakpoints across the genome:")
  svcbrk.common.dens <- break.density(common.brk2,chr.lim = chr.lim, window.size = window.size, slide.size = slide.size,verbose = verbose)
  
  commonSamples <- intersect(names(cnvbrk@burden),names(svcbrk@burden))
  if(length(commonSamples) == 0) stop("There is no common samples between 'cnv' and 'svc' input datasets.") 
  
  # calculate inter quantile mean and standard deviation per sample
  iqmdata1<- rep(0,length(commonSamples))
  names(iqmdata1) <- commonSamples
  
  sddata1 <- iqmdata2 <- sddata2<- iqmdata3<- sddata3 <- iqmdata1
  iqmdata1[names(apply(cnvbrk.dens,1,IQM,lowQ=.1,upQ=.9))] <- apply(cnvbrk.dens,1,IQM,lowQ=.1,upQ=.9)
  sddata1[names(apply(cnvbrk.dens,1,IQSD,lowQ=.1,upQ=.9))]  <- apply(cnvbrk.dens,1,IQSD,lowQ=.1,upQ=.9)
  iqmdata2[names(apply(svcbrk.dens,1,IQM,lowQ=.1,upQ=.9))] <- apply(svcbrk.dens,1,IQM,lowQ=.1,upQ=.9)
  sddata2[names(apply(svcbrk.dens,1,IQSD,lowQ=.1,upQ=.9))]  <- apply(svcbrk.dens,1,IQSD,lowQ=.1,upQ=.9)
  iqmdata3[names(apply(cnvbrk.common.dens,1,IQM,lowQ=.1,upQ=.9))] <- apply(cnvbrk.common.dens,1,IQM,lowQ=.1,upQ=.9)
  sddata3[names(apply(cnvbrk.common.dens,1,IQSD,lowQ=.1,upQ=.9))]  <- apply(cnvbrk.common.dens,1,IQSD,lowQ=.1,upQ=.9)
  
  
  a <- sapply(commonSamples, function(i) names(which(cnvbrk.dens[i,] > iqmdata1[i]+num.cnv.sd*sddata1[i] )),simplify=FALSE)
  b <- sapply(commonSamples, function(i) names(which(cnvbrk.dens[i,] >= num.cnv.breaks)),simplify=FALSE)
  c <- sapply(commonSamples, function(i) names(which(svcbrk.dens[i,] > iqmdata2[i]+num.svc.sd*sddata2[i] )),simplify=FALSE)
  d <- sapply(commonSamples, function(i) names(which(svcbrk.dens[i,] >= num.svc.breaks)),simplify=FALSE)
  e <- sapply(commonSamples, function(i) names(which(cnvbrk.common.dens[i,] > iqmdata3[i]+num.common.sd*sddata3[i] )),simplify=FALSE)
  f <- sapply(commonSamples, function(i) names(which(cnvbrk.common.dens[i,] >= num.common.breaks)),simplify=FALSE)
  
  
  # condition for chromothripsis: at least n=breaks > 6 (svc SND cnv)  AND n-breaks > u+2*sd (svc AND cnv) 
  res <- sapply(commonSamples,function(i) Reduce(intersect, list(a[[i]],b[[i]],c[[i]],d[[i]],e[[i]],f[[i]])), simplify=FALSE)
  
  highDensityRegions <- rbind(cnvbrk.dens[commonSamples,])
  if(length(commonSamples == 1)) rownames(highDensityRegions) <- commonSamples
  highDensityRegions[] <- 0
  for(cl in commonSamples) highDensityRegions[cl,res[[cl]]] <- 1
  
  #res <- res[which(unlist(lapply(res,length)) >0)]
  if(verbose){
    message("Locating shattered regions...")
    pb <- txtProgressBar(style=3)
    cc <-0
    tot <- length(res)
  }
  
  restab  <- list()
  for(cl in names(res)){
    if(verbose) cc <- cc+1
    if(verbose) setTxtProgressBar(pb, cc/tot)
    if(length(res[[cl]]) > 0){
      tab <- data.table(do.call(rbind,strsplit(res[[cl]]," ")))
      colnames(tab) <- c("chrom","start","end")
      tab$start <- as.numeric(tab$start )
      tab$end <- as.numeric(tab$end )
      
      tabgr = with(tab, GRanges(chrom, IRanges(start=start, end=end))) 
      hits = as.data.frame(suppressWarnings(GenomicAlignments::findOverlaps(tabgr,tabgr)))
      
      agg <- aggregate(subjectHits ~ queryHits, hits, paste,simplify=FALSE)
      prev<-c(); cnum <- 0
      agglist <- list()
      for(x in agg$subjectHits){
        if(length(intersect(x,prev) > 0)){
          agglist[[cnum]] <- unique(c(x,prev))
          prev <- agglist[[cnum]]
        }else{
          cnum <- cnum+1
          agglist[[cnum]]<- x
          prev <-agglist[[cnum]]
        }
      }
      agglistUniq <- list()
      for(i in 1:length(agglist)){
        chrom <- as.character(unique(tab[as.numeric(agglist[[i]]),"chrom"]))
        start<-min( tab[as.numeric(agglist[[i]]),"start"])
        end<-max( tab[as.numeric(agglist[[i]]),"end"])
        nbins <- length(agglist[[i]])
        conf <- "lc"
        agglistUniq[[i]] <-  data.table(chrom,start,end,nbins,conf)
      }
      tabmerged <- data.table(do.call(rbind,agglistUniq))
      restab[[cl]] <- tabmerged
    }
  }
  if(verbose) close(pb)
  
  results <- chromo.regs(
    regions.summary = restab,
    high.density.regions = highDensityRegions,
    high.density.regions.hc = matrix(),
    cnv.brk.dens = cnvbrk.dens,
    svc.brk.dens = svcbrk.dens,
    cnv.brk.common.dens = cnvbrk.common.dens,
    svc.brk.common.dens = svcbrk.common.dens,
    cnvbrk = cnvbrk,
    svcbrk = svcbrk,
    common.brk = common.brk,
    cnv = cnv,
    svc = svc,
    param = list(fc.pct = fc.pct,
                  min.cnv.size = min.cnv.size, 
                  min.num.probes=min.num.probes, 
                  low.cov = low.cov,
                  clean.brk=clean.brk,
                  window.size = window.size,
                  slide.size = slide.size, 
                  num.cnv.breaks = num.cnv.breaks,
                  num.cnv.sd = num.cnv.sd,
                  num.svc.breaks = num.svc.breaks,
                  num.svc.sd = num.svc.sd, 
                  num.common.breaks = num.common.breaks, 
                  num.common.sd = num.common.sd,
                  maxgap=maxgap,
                  chrlist=chrlist, 
                  interleaved.cut = interleaved.cut,
                  dist.iqm.cut = dist.iqm.cut)
        )
  
  results <- shattered.eval(results, interleaved.cut = interleaved.cut, dist.iqm.cut = dist.iqm.cut, verbose = verbose)
  
  return(results)
}

#' Generates a GenomicRanges objact containing genomic bins based on a given bin size. If a cnv (svcnvio) object is provided the chromosome limits 
#' will be obtaind from mapped regions, otherwise chromosome limits will be obtained from the database (D3GB)
#'  
#' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv
#' @param genome.v (hg19 or hg38) reference genome version to generate genoic bins (ignored if cnv is not NULL)
#' @param window.size (numeric) size in megabases to generate genomic bins
#' @param slide.size (numeric) size in megabases of the sliding genomic window; slide.size must be <= 1
#' @return an instance of the class 'chromo.regs' containing information about shattered regions
#' @export
#' 

get.chr.bins <- function(cnv = NULL, genome.v= "hg19", window.size=10, slide.size=2){
  WS <- window.size * 1e+6
  SS <- slide.size * 1e+6
  offset <- round(window.size/slide.size)
  
  stopifnot(WS >= SS)
  
  if(is.null(cnv)){
    chr.lim<- d3gb.chr.lim(genome.v=genome.v)
  }else{
    chr.lim<- chromosome.limit.coords(cnv)
  }
  
  df.list<-list()
  for(chr in unique(chr.lim$chrom)){
    binlimits <- seq(chr.lim$begin[which(chr.lim$chrom == chr)], chr.lim$end[which(chr.lim$chrom == chr)],SS)
    chrom <- rep(chr, length(binlimits)-offset)
    Begin <- binlimits[1:(length(binlimits)-offset)]+1
    End <- binlimits[(offset+1):length(binlimits)]
    Name <-  paste(chrom,Begin,End,sep=" ")
    df.list[[chr]] <- data.frame(chrom,Begin,End,Name)
  }
  bingr <- with(do.call(rbind,df.list), GRanges(chrom, IRanges(start=Begin, end=End),name=Name))
  return(bingr)
}


#' Transforms a bed format data.frame containing genomic regions into a matrix of n samples versus m defined genomic bins where bins overlapping with bed segments take value = 1
#'  
#' @param bed (data.frame) An data.frame
#' @param bingr (S4) a GenomicRanges object containing the 
#' @param genome.v (hg19 or hg38) reference genome version to generate genoic bins (ignored if bingr is not NULL)
#' @param window.size (numeric) size in megabases to generate genomic bins
#' @param slide.size (numeric) size in megabases of the sliding genomic window; slide.size must be <= 1
#' @return an instance of the class 'chromo.regs' containing information about shattered regions
#' @export
#' 

bed2chromo.reg <- function(bed, bingr=NULL, genome.v="hg19", window.size=10, slide.size=2){
  
  if(!is.null(bingr)){
    stopifnot(isS4(bingr))
  }else{
    bingr <- get.chr.bins(cnv=NULL,genome.v="hg19", window.size=window.size, slide.size=slide.size)
  }
  
  inbedGR <- with(bed,GRanges(chrom, IRanges(start=start, end=end)))
  idNames <- unique(bed$sampleid)
  tot <- length(unique(bed$sampleid))*length(binsgr$name)
  hbd <- matrix(rep(0,),ncol=length(binsgr$name),nrow=length(idNames))
  colnames(hbd) <- bingr$name
  rownames(hbd) <- idNames
  
  hits <- GenomicAlignments::findOverlaps(bingr,inbedGR)
  
  for(i in idNames){
    hbdbins_i <- bingr$name[queryHits(hits)[which(bed$sampleid[subjectHits(hits)] == i)]]
    hbd[i,hbdbins_i] <- 1
  }
  return(hbd)
}
ccbiolab/svpluscnv documentation built on Sept. 9, 2020, 4:52 a.m.