R/ngscopy.R

Defines functions read_regions parse_segmtype help_segmtype df.to.gr

Documented in df.to.gr help_segmtype parse_segmtype read_regions

## ************************************************************************
##
##
##
## (c) Xiaobei Zhao
##
## Tue May 20 16:01:39 EDT 2014 -0400 (Week 20)
##
##
## Reference:
## require(parallel)
##
##
## ************************************************************************



##' Detection of copy number variations in next generation sequencing (NGS)
##'
##' 
##' @title Detection of copy number variations in next generation sequencing (NGS)
##' @description
##' NGScopy is a reference class to detect copy number variations by ``restriction-imposed windowing'' in next generation sequencing (NGS).
##' 
##' @author Xiaobei Zhao
##' @field inFpathN character, 
##' the file path to the control (normal) sample
##' @field inFpathT character, 
##' the file path to the case (tumor) sample
##' @field outFpre character, 
##' the file path of the directory for output. 
##' @field libsizeN numeric, the library size of the control (normal) sample.
##' @field libsizeT numeric, the library size of the case (tumor) sample.
##' @field mindepth numeric, the minimal depth of reads per window.
##' @field minsize numeric, the minimal size of a window.
##' @field regions data.frame of three columns (chr/start/end),
##' the regions to study.
##' It follows the BED format: zero-based, half-open; (start,end].
##' @field segtype character, the type of change to capture during segmentation,
##' mean and/or variance, normal or nonparametric distributions.
##' A character vector with a single or multiple values from
##' c("mean.norm","meanvar.norm","mean.cusum","var.css"). 
##' see \code{changepoint}.
##' @field dsN integer, the downsampling factor of the control (normal) sample.
##' @field dsT integer, the downsampling factor of the case (tumor) sample.
##' @field MoreArgs.cn list, a list of arguments for method `calc_cn'.
##' See \code{set_MoreArgs.cn}.
##' @field MoreArgs.segm list, a list of arguments for method `calc_segm'.
##' See \code{set_MoreArgs.segm}.
##' @field pcThreads integer,
##' the number of processors performing the parallel computing.
##' @field auto.save logical, whether to save (any completed results)
##' automatically.
##' @field auto.load logical, whether to load (any previously completed results)
##' automatically.
##' @field force.rerun character, the names of methods to rerun regardless of
##' any previous runs, default to c().
##' @field out list, the output.
##' 
##' @examples
##' require(NGScopy)
##' require(NGScopyData)
##' 
##' ## Create an instance of `NGScopy' class
##' obj <- NGScopy$new(
##'   outFpre="ngscopy-case1",         # specified directory for output
##'   inFpathN=tps_N8.chr6()$bamFpath, # normal sample: tps_90.chr6.sort.bam
##'   inFpathT=tps_90.chr6()$bamFpath, # tumor sample:  tps_N8.chr6.sort.bam
##'   libsizeN=5777087,                # the library size of the normal sample
##'   libsizeT=4624267,                # the library size of the tumor sample
##'   mindepth=20,                     # the minimal depth of reads per window
##'   minsize=20000,                   # the minimal size of a window
##'   pcThreads=1                      # the number of processors for computing
##'   )
##' 
##' obj$show()                         # print the instance
##' 
##' \dontrun{
##'
##' ## Compute the copy number and save it
##' ## A data.frame will be saved to file `ngscopy_cn.txt' in the output directory
##' obj$write_cn()
##' 
##' ## Compute the segmentation and save it
##' ## A data.frame will be saved to file `ngscopy_segm.txt' in the output directory
##' obj$write_segm()
##' 
##' ## Visualization
##' ## A figure will be saved to file `ngscopy_out.pdf' in the output directory
##' obj$plot_out()
##' }
##' 
##' @seealso \code{NGScopyData}
##' ##@exportClass NGScopy
NGScopy <- 
  setRefClass(
    'NGScopy',
    list(
      inFpathN='character',
      inFpathT='character',
      outFpre='character',
      libsizeN='numeric',
      libsizeT='numeric',
      mindepth='numeric',
      minsize='numeric',
      regions='data.frame',
      segtype='character',
      dsN='integer',
      dsT='integer',
      MoreArgs.cn='list',
      MoreArgs.segm='list',
      pcThreads='integer',
      auto.save='logical',
      auto.load='logical',
      force.rerun='character',
      out='list'
      ),
    contains='xRefClass',
    methods=list(
      initialize=function(...){
        .idx <- c(outFpre=1)
        callSuper(...,.index=.idx)
        setme()
      },
      setme=function(...){
        ##
        if (!length(auto.save)){
          auto.save <<- FALSE
          stampmsg(sprintf(
            'auto.save not specified, using a default of %s.',auto.save))
        }
        message('auto.save: ',.self$auto.save)
        
        ##
        if (!length(auto.load)){
          auto.load <<- FALSE
          stampmsg(sprintf(
            'auto.load not specified, using a default of %s.',auto.load))
        }
        message('auto.load: ',.self$auto.load)
        
        ## 
        if (length(inFpathN)){
          .set_inFpathN()
        }
        if (length(inFpathT)){
          .set_inFpathT()
        }
        if (length(outFpre)){
          .set_outFpre()
        }
        if (length(libsizeN)){
          .set_libsizeN()
        }
        if (length(libsizeT)){
          .set_libsizeT()
        }
        if (length(mindepth)){
          .set_mindepth()
        }
        if (length(minsize)){
          .set_minsize()
        }
        if (length(regions)){
          .set_regions()
        }
        if (length(segtype)){
          .set_segmtype()
        }
        if (length(dsN)){
          .set_dsN()
        }
        if (length(dsT)){
          .set_dsT()
        }
        if (length(MoreArgs.cn)){
          .set_MoreArgs.cn()
        }
        if (length(MoreArgs.segm)){
          .set_MoreArgs.segm()
        }
        if (length(force.rerun)){
          .set_force.rerun()
        }
        if (length(pcThreads)){
          .set_pcThreads()
        }
        
        ##
        invisible()        
      },
      .set_inFpathN=function(){
        'For internal use only.'
        stampmsg("set_normal | inFpathN")
        if (!length(.self$inFpathN)){
          stop('inFpathN must be specified.')
        }
        if (!is.file(.self$inFpathN) | !is.file(paste(.self$inFpathN,'.bai',sep=''))){
          stop('inFpathN and its index file (.bai) must be valid.')          
        }
        out$inFpathN <<- .self$inFpathN
        ##
        message('inFpathN: ',.self$inFpathN)
        message('')
        invisible()
      },
      .set_mindepth=function(){
        'For internal use only.'
        stampmsg("set_normal | mindepth")
        if (!length(.self$mindepth)){
          message('mindepth not specified, using a default of 20.')
          .self$mindepth <- 20
        }
        out$mindepth <<- mindepth
        ##
        .scipen <- options()$scipen
        options(scipen=9)
        message('mindepth: ',.self$mindepth) 
        options(scipen=.scipen)    
        message('')   
        invisible()
      },
      .set_minsize=function(){
        'For internal use only.'
        stampmsg("set_normal | minsize")
        if (!length(.self$minsize)){
          message('minsize not specified, using a default of 20000.')
          .self$minsize <- 20000
        }
        out$minsize <<- minsize
        ##
        .scipen <- options()$scipen
        options(scipen=9)
        message('minsize: ',.self$minsize)
        options(scipen=.scipen)
        message('')   
        invisible()
      },
      set_inFpathN=function(inFpathN){
        'Set a control (normal) sample. inFpathN: The file path to the control (normal) sample.'
        if (!missing(inFpathN)){
          if(length(.self$inFpathN)){
            stop('set_inFpathN | inFpathN must be set only once.')
          }
          .self$inFpathN <- inFpathN
          .set_inFpathN()
        }
        invisible()        
      },
      set_mindepth=function(mindepth){
        'Set the minimal depth per window. mindepth: the minimal depth of reads per window in the control (normal) sample.'
        if (!missing(mindepth)) {
          if(length(.self$mindepth)){
            stop('set_mindepth | mindepth must be set only once.')
          } 
          .self$mindepth <- mindepth
          .set_mindepth()
        }
        invisible()
      },
      set_minsize=function(minsize){
        'Set the minimal size per window. minsize: the minimal size of a window in the control (normal) sample.'
        if (!missing(minsize)) {
          if(length(.self$minsize)){
            stop('set_minsize | minsize must be set only once.')
          }
          .self$minsize <- minsize
          .set_minsize()
        }
        invisible()
      },
      .set_normal=function(){
        'For internal use only.'
        .set_inFpathN(inFpathN)
        .set_mindepth(mindepth)
        .set_minsize(minsize)
        invisible()
      },
      set_normal=function(inFpathN,mindepth,minsize){
        'Set a control (normal) sample and minimal depth/size per window. See `set_inFpathN\', `set_mindepth\', `set_minsize\'.'
        set_inFpathN(inFpathN)
        set_mindepth(mindepth)
        set_minsize(minsize)
        invisible()
      },
      .set_inFpathT=function(){
        'For internal use only.'
        stampmsg("set_tumor | inFpathT")
        if (!length(.self$inFpathT)){
          stop('inFpathT must be specified.')
        }
        if (!is.file(.self$inFpathT) | !is.file(paste(.self$inFpathT,'.bai',sep=''))){
          stop('inFpathT and its index file (.bai) must be valid.')          
        }
        out$inFpathT <<- .self$inFpathT
        ##
        message('inFpathT: ',.self$inFpathT)
        message('')
        invisible()
      },
      set_inFpathT=function(inFpathT){
        'Set a case (tumor) sample. inFpathT: The file path to the case (tumor) sample.'
        if (!missing(inFpathT)){
          if(length(.self$inFpathT)){
            stop('set_inFpathT | inFpathT must be set only once.')
          } 
          .self$inFpathT <- inFpathT
          .set_inFpathT()
        }
        invisible()
      },
      set_tumor=function(inFpathT){
        'Set a case (tumor) sample. See `set_inFpathT\'.'
        set_inFpathT(inFpathT)
        invisible()
      },
      .set_outFpre=function(){
        'For internal use only.'
        stampmsg("set_outFpre")
        if (!length(.self$outFpre)){
          stop('set_outFpre | outFpre must be specified.')
        }
        ## out$outFpre <<- sprintf("%s.d%s_w%s",outFpre,mindepth,minsize)
        out$outFpre <<- .self$outFpre
        make.dir(out$outFpre)
        out$outFpath <<- sprintf("%s/ngscopy_out.RData",out$outFpre)
        ## out$objFpath <<- sprintf("%s/ngscopy_obj.RData",out$outFpre)
        message('outFpre: ',.self$outFpre)
        message('')
        invisible()        
      },
      set_outFpre=function(outFpre){
        'Set a directory for output. outFpre: the file path of the directory for output.'
        if (!missing(outFpre)) {
          if(length(.self$outFpre)){
            stop('set_outFpre | outFpre must be set only once.')
          }
          .self$outFpre <- outFpre
          .set_outFpre()
        }
        invisible()
      },
      .set_libsizeN=function(){
        'For internal use only.'
        stampmsg("set_libsize | libsizeN")
        if (!length(.self$libsizeN)){
          message('libsizeN not specified, assuming 10**6.')
          .self$libsizeN <- 10**6
        }
        out$libsizeN <<- libsizeN
        ##
        message('libsizeN: ',.self$libsizeN)
        message('')
        invisible()        
      },
      .set_libsizeT=function(){
        'For internal use only.'
        stampmsg("set_libsize | libsizeT")
        if (!length(.self$libsizeT)){
          message('libsizeT not specified, assuming 10**6.')
          .self$libsizeT <- 10**6
        }        
        out$libsizeT <<- libsizeT
        ##
        message('libsizeT: ',.self$libsizeT)
        message('')
        invisible()        
      },
      set_libsizeN=function(libsizeN){
        'Set library size of the control (normal). libsizeN: numeric, the library size of the control (normal) sample.'
        if (!missing(libsizeN)) {
          if(length(.self$libsizeN)){
            stop('set_libsizeN | libsizeN must be set only once.')
          } 
          .self$libsizeN <- libsizeN
          .set_libsizeN()
        }
        invisible()
      },
      set_libsizeT=function(libsizeT){
        'Set library size of the case (tumor). libsizeT: numeric, the library size of the case (tumor) sample.'
        if (!missing(libsizeT)) {
          if(length(.self$libsizeT)){
            stop('set_libsizeT | libsizeT must be set only once.')
          } 
          .self$libsizeT <- libsizeT
          .set_libsizeT()
        }
        invisible()
      },
      .set_libsize=function(){
        'For internal use only.'
        .set_libsizeN(libsizeN)
        .set_libsizeT(libsizeT)
        invisible()        
      },
      set_libsize=function(libsizeN,libsizeT){
        'Set library sizes. See `set_libsizeN\', `set_libsizeT\'.'
        set_libsizeN(libsizeN)
        set_libsizeT(libsizeT)
        invisible()
      },
      .set_regions=function(){
        'For internal use only.'
        stampmsg("set_regions")
        if(nrow(.self$regions)){
          if (! 'refid' %in% .self$regions){
            .refname <- unique(.self$regions[,'chr'])
            .refname0 <- unique(.ref_to_refname(.get_ref()))
            if (! all(.refname %in% .refname0) ){
              stop(sprintf(
                '.set_regions | inconsistency in chromosome names: the input (%s) should be all or a subset of the reference genome (%s).',
                vconcat(.refname,quote=TRUE),
                vconcat(.refname0,quote=TRUE)
                ))
            }
            .self$regions <- .refname_to_refid(.self$regions,.get_ref(),'chr')
          } 
        }else{
          .self$regions <- .ref_to_regions(.get_ref())
        }
        
        ## logme(.self$regions)
        .self$regions <-
          .trim_regions(.sort_regions(.self$regions,.get_ref()),.get_ref())
        out$regions <<- .self$regions
        ##
        .scipen <- options()$scipen
        options(scipen=9)
        message('regions: \n',dfconcat(.refid_to_refname(
          head(.self$regions),.get_ref(),refname='chr'
          )))
        options(scipen=.scipen)    
        if (nrow(.self$regions)>6){
          message('... ...')
        }
        message('')
      },
      set_regions=function(regions){
        'Set regions. regions: the regions in study, matrix, data.frame, character, file or connection with three columns (chr/start/end).'
        if(!missing(regions)){
          if (nrow(.self$regions)){
            stop('set_regions | regions must be set only once.')
          }
          .regions <- read_regions(regions)
          .self$regions <- .regions
          .set_regions()
        } 
      },
      .set_segmtype=function(){
        'For internal use only.'
        stampmsg("set_segmtype")
        .self$segtype <- .check_segmtype(.self$segtype)
        if (!length(.self$segtype)){
          ##.default.segmtype <- .parse_segmtype()
          ##.default.segmtype <- "mean.cusum"
          .default.segmtype <- "mean.norm"
          message(sprintf(
            'segtype not specified, using a default of %s.',
            vconcat(.default.segmtype,capsule=TRUE,quote=TRUE)))
          .self$segtype <- .default.segmtype
        }
        out$segtype <<- .self$segtype
        ##
        message('segtype: ',vconcat(.self$segtype,capsule=TRUE,quote=TRUE))
        message('')
        invisible()
      },
      set_segmtype=function(segtype){
        'Set the type of segmentation. segtype: a character vector with a single or multiple values from c("mean.norm","meanvar.norm","mean.cusum","var.css"). See `changepoint\'.'
        if (!missing(segtype)){
          if(length(.self$segtype)){
            message(sprintf(
              'set_segmtype | segtype (%s) is overridden by (%s).',
              vconcat(.self$segtype,quote=TRUE),
              vconcat(segtype,quote=TRUE)
              ))
          }
          .self$segtype <- segtype
          .set_segmtype()
        }
        invisible()
      },
      .set_dsN=function(){
        'For internal use only.'
        stampmsg("set_ds | dsN")
        if (!length(.self$dsN)){
          message('dsN not specified, assuming 1.')
          .self$dsN <- as.integer(1)
        }
        if (.self$dsN < 1) {
          stop('dsN must be an integer no less than 1.')
        }
        out$dsN <<- .self$dsN
        ##
        message('dsN: ',.self$dsN)
        message('')
        invisible()        
      },
      set_dsN=function(dsN){
        'Set downsampling factor of the control (normal). dsN: numeric, the library size of the control (normal) sample.'
        if (!missing(dsN)) {
          if(length(.self$dsN)){
            stop('set_dsN | dsN must be set only once.')
          }
          .self$dsN <- as.integer(dsN)
          .set_dsN()
        }
        invisible()
      },
      .set_dsT=function(){
        'For internal use only.'
        stampmsg("set_ds | dsT")
        if (!length(.self$dsT)){
          message('dsT not specified, assuming 1.')
          .self$dsT <- as.integer(1)
        }        
        if (.self$dsT < 1) {
          stop('dsT must be an integer no less than 1.')
        }
        out$dsT <<- .self$dsT
        ##
        message('dsT: ',.self$dsT)
        message('')
        invisible()        
      },
      set_dsT=function(dsT){
        'Set downsampling factor of the case (tumor). dsT: numeric, the library size of the case (tumor) sample.'
        if (!missing(dsT)) {
          if(length(.self$dsT)){
            stop('set_dsT | dsT must be set only once.')
          }
          .self$dsT <- as.integer(dsT)
          .set_dsT()
        }
        invisible()
      },
      set_ds=function(dsN,dsT){
        'Set downsampling factors. See `set_dsN\', `set_dsT\'.'
        set_dsN(dsN)
        set_dsT(dsT)
        invisible()
      },
      .set_MoreArgs.cn=function(){
        'For internal use only.'
        stampmsg("set_MoreArgs.cn")
        if (!length(.self$MoreArgs.cn)){
          message('MoreArgs.cn not specified, assuming list(pseudocount=1,logr=TRUE).')
          .self$MoreArgs.cn <- list(pseudocount=1,logr=TRUE)
        }
        out$MoreArgs.cn <<- MoreArgs.cn
        ##
        message(cat('MoreArgs.cn: '))
        message(str(.self$MoreArgs.cn))
        message('')
        invisible()        
      },
      set_MoreArgs.cn=function(...){
        'Set MoreArgs.cn. ..., (pseudocount: the pseudocounts added to the observed depth per window, default to 1; logr: logical, whether to make log2 transformation of the ratios, default to TRUE.)'
        .MoreArgs.cn <- list(...)
        if (length(.MoreArgs.cn)) {
          if(length(.self$MoreArgs.cn)){
            message('set_MoreArgs.cn | MoreArgs.cn is updated.')
          }
          ##.self$MoreArgs.cn <- .MoreArgs.cn
          ##.self$MoreArgs.cn <- utils::modifyList(.self$MoreArgs.cn,.MoreArgs.cn)
          .self$MoreArgs.cn <-
            utils::modifyList(list(pseudocount=1,logr=TRUE),.MoreArgs.cn)
          .set_MoreArgs.cn()
        }
        invisible()
      },
      .set_MoreArgs.segm=function(){
        'For internal use only.'
        stampmsg("set_MoreArgs.segm")
        if (!length(.self$MoreArgs.segm)){
          message('MoreArgs.segm not specified, assuming list().')
          .self$MoreArgs.segm <- list()
          ## .self$MoreArgs.segm <- list(maxnseg=10)
        }
        out$MoreArgs.segm <<- MoreArgs.segm
        ##
        message(cat('MoreArgs.segm: '))
        message(str(.self$MoreArgs.segm))
        message('')
        invisible()        
      },
      set_MoreArgs.segm=function(...){
        'Set MoreArgs.segm. ..., a list of other arguments to the funciton of segmentation given by segtype. See `help_segmtype\'.'
        .MoreArgs.segm <- list(...)
        if (length(.MoreArgs.segm)) {
          if(length(.self$MoreArgs.segm)){
            message('set_MoreArgs.segm | MoreArgs.segm is updated.')
          }
          ##.self$MoreArgs.segm <- .MoreArgs.segm
          ##.self$MoreArgs.segm <- utils::modifyList(.self$MoreArgs.segm,.MoreArgs.segm)
          .self$MoreArgs.segm <- .MoreArgs.segm
          .set_MoreArgs.segm()
        }
        invisible()
      },
      .set_pcThreads=function(){
        'For internal use only.'
        stampmsg("set_pcThreads")
        if (!length(.self$pcThreads)){
          message('pcThreads not specified, assuming 1.')
          .self$pcThreads <- as.integer(1)
        }
        out$pcThreads <<- pcThreads
        ##
        message('pcThreads: ',.self$pcThreads)
        message('')
        invisible()        
      },
      set_pcThreads=function(pcThreads){
        'Set the number of processors. pcThreads: numeric, the number of processors performing the parallel computing. It should not exceed the system\'s capacity.'
        if (!missing(pcThreads)) {
          if(length(.self$pcThreads)){
            message(sprintf(
              'set_pcThreads | pcThreads (%s) overrides a previous value (%s).',
              pcThreads,
              .self$pcThreads
              ))
          }
          .self$pcThreads <- pcThreads
          .set_pcThreads()
        }
        invisible()
      },
      .set_force.rerun=function(){
        'For internal use only.'
        stampmsg("set_force.rerun")
        if (!length(.self$force.rerun)){
          message('force.rerun not specified, assuming c("").')
          .self$force.rerun <- c("")
        }
        out$force.rerun <<- force.rerun
        ##
        message('force.rerun: ',vconcat(.self$force.rerun,capsule=TRUE,quote=TRUE))
        message('')
        invisible()        
      },
      set_force.rerun=function(force.rerun){
        'Set force.rerun. Reset it with missing input.'
        if (!missing(force.rerun)) {
          .force.rerun <- force.rerun
          if (!length(.force.rerun)){
            .force.rerun <- c("")
          }
          if(length(.self$force.rerun)){
            message(sprintf(
              'set_force.rerun | force.rerun (%s) overrides a previous value (%s).',
              vconcat(.force.rerun,capsule=FALSE,quote=TRUE),
              vconcat(.self$force.rerun,capsule=FALSE,quote=TRUE)
              ))
          } 
        } else{
          .force.rerun <- c("")
        }
        .self$force.rerun <- .force.rerun
        .set_force.rerun()
        invisible()
      },
      get_inFpathN=function(){
        'Get inFpathN'
        if (!.loadme('inFpathN')){
          .set_inFpathN()
        }
        .self$inFpathN
      },
      get_inFpathT=function(){
        'Get inFpathT'
        if (!.loadme('inFpathT')){
          .set_inFpathT()
        }
        .self$inFpathT
      },
      get_outFpre=function(){
        'Get outFpre'
        if (!.loadme('outFpre')){
          .set_outFpre()
        }
        .self$outFpre
      },
      get_libsizeN=function(){
        'Get libsizeN'
        if (!.loadme('libsizeN')){
          .set_libsizeN()
        }
        .self$libsizeN
      },
      get_libsizeT=function(){
        'Get libsizeT'
        if (!.loadme('libsizeT')){
          .set_libsizeT()
        }
        .self$libsizeT
      },
      get_mindepth=function(){
        'Get mindepth'
        if (!.loadme('mindepth')){
          .set_mindepth()
        }
        .self$mindepth
      },
      get_minsize=function(){
        'Get minsize'
        if (!.loadme('minsize')){
          .set_minsize()
        }
        .self$minsize
      },
      .get_regions=function(){
        'For internal use only.'
        if (!.loadme('regions')){
          .set_regions()
        }
        .self$regions
      },
      get_regions=function(){
        'Get regions.'
        .refid_to_refname(.get_regions(),.get_ref(),'chr')
      },
      get_segmtype=function(){
        'Get segtype, segmentation type(s).'
        if (!.loadme('segtype')){
          .set_segmtype()
        }
        .self$segtype
      },
      get_dsN=function(){
        'Get dsN'
        if (!.loadme('dsN')){
          .set_dsN()
        }
        .self$dsN
      },
      get_dsT=function(){
        'Get dsT'
        if (!.loadme('dsT')){
          .set_dsT()
        }
        .self$dsT
      },
      get_MoreArgs.cn=function(){
        'Get MoreArgs.cn'
        if (!.loadme('MoreArgs.cn')){
          .set_MoreArgs.cn()
        }
        .self$MoreArgs.cn
      },
      get_MoreArgs.segm=function(){
        'Get MoreArgs.segm'
        if (!.loadme('MoreArgs.segm')){
          .set_MoreArgs.segm()
        }
        .self$MoreArgs.segm
      },
      get_pcThreads=function(){
        'Get pcThreads'
        if (!.loadme('pcThreads')){
          .set_pcThreads()
        }
        .self$pcThreads
      },
      .get_windows=function(){
        'For internal use only.'
        ## via proc_normal
        if (!.loadme('windows','proc_normal')){
          proc_normal()          
        }
        if (!'windows' %in% names(out)){
          stop('NGScopy$.get_windows | windows is not available.')
        }
        out[['windows']]
      },
      get_windows=function(){
        'Get the windows.'
        res <- .get_windows()
        res <- as.data.frame(res)
        res <- .refid_to_refname(res,.get_ref(),'chr')
        res
      },
      get_size=function(){
        'Get the size per window.'
        ## via proc_normal
        if (!.loadme('size','proc_normal')){
          proc_normal()
        }
        if (!'size' %in% names(out)){
          stop('NGScopy$get_size | size is not available.')
        }
        out[['size']]
      },
      get_pos=function(){
        'Get the position (midpoint) per window.'
        ## via proc_normal
        if (!.loadme('pos','proc_normal')){
          proc_normal()
        }
        if (!'pos' %in% names(out)){
          stop('NGScopy$get_pos | pos is not available.')
        }
        out[['pos']]
      },
      .get_dataN=function(){
        'For internal use only.'
        ## via proc_normal
        if (!.loadme('dataN','proc_normal')){
          proc_normal()
        }
        if (!'dataN' %in% names(out)){
          stop('NGScopy$get_dataN | dataN is not available.')
        }
        out[['dataN']]
      },
      get_dataN=function(){
        'Get the data of the normal per window.'
        .dataN <- .get_dataN()
        colnames(.dataN) <- paste(colnames(.dataN),'.N',sep='')
        colnames(.dataN)[colnames(.dataN)=='depth.N'] <- "depthN"
        return(.dataN)        
      },
      get_depthN=function(){
        'Get the depth of the normal per window.'
        get_dataN()[,'depthN']
      },
      .get_dataT=function(){
        'For internal use only.'
        ## via proc_tumor
        if (!.loadme('dataT','proc_tumor')){
          proc_tumor()
        }
        if (!'dataT' %in% names(out)){
          stop('NGScopy$get_dataT | dataT is not available.')
        }
        out[['dataT']]
      },
      get_dataT=function(){
        'Get the data of the normal per window.'
        .dataT <- .get_dataT()
        colnames(.dataT) <- paste(colnames(.dataT),'.T',sep='')
        colnames(.dataT)[colnames(.dataT)=='depth.T'] <- "depthT"
        return(.dataT)        
      },
      get_depthT=function(){
        'Get the depth of the tumor per window.'
        get_dataT()[,'depthT']
      },
      get_cn=function(){
        'Get the copy number object.'
        ## via calc_cn
        if (!.loadme('cn','calc_cn')){
          calc_cn()
        }
        if (!'cn' %in% names(out)){
          stop('NGScopy$get_cn | cn is not available.')
        }
        out[['cn']]
      },
      get_cnr=function(){
        'Get the copy number ratios.'
        get_cn()[['cnr']]
      },      
      .get_data.cn=function(){
        'For internal use only.'
        ## via proc_cn
        if (!.loadme('data.cn','proc_cn')){
          proc_cn()
        }
        out[['data.cn']]
      },
      get_data.cn=function(as.granges=FALSE){
        'Get the data.frame of copy number.'
        ret <- .get_data.cn()
        ret <- ret[,colnames(ret)!='refid']
        if (as.granges){
          .ret <- df.to.gr(ret,chrlength=get_reflength())
          if (length(.ret)){
            ret <- .ret
          } else{
            stop('get_data.cn | Please set as.granges to FALSE.')
          }
        }
        ret
      },
      get_segm=function(){
        'Get the segmentation object.'
        if (!.loadme('seg','calc_segm')){
          calc_segm()
        }
        if (!'seg' %in% names(out)){
          stop('NGScopy$get_segm | seg is not available.')
        }
        out[['seg']]
      },
      .get_data.segm=function(){
        'For internal use only.'
        ## via proc_segm
        if (!.loadme('data.segm','proc_segm')){
          proc_segm()
        }
        if (!'data.segm' %in% names(out)){
          stop('NGScopy$.get_data.segm | data.segm is not available.')
        }
        out[['data.segm']]
      },
      get_data.segm=function(as.granges=FALSE){
        'Get the data.frame of segmentation.'
        ret <- .get_data.segm()
        ret <- lapply(ret,function(e) {e[!names(e) %in% c('plottype')]})
        ret <- do.call(rbind,lapply(names(ret),function(a) {cbind(ret[[a]],segtype=a)}))
        ret$segtype <- as.character(ret$segtype)
        if (as.granges){
          .ret <- df.to.gr(ret,chrlength=get_reflength())
          if (length(.ret)){
            ret <- .ret
          } else{
            stop('get_data.segm | Please set as.granges to FALSE.')
          }
        }
        ret
      },
      .proc_ref=function(){
        'For internal use only.'
        ## Set reference genome of the normal sample.
        if (.loadme('ref')){
          return()
        }
        stampmsg(".proc_ref | process reference genome (in bam header of the normal sample)")
        if (!length(inFpathN)){
          stop('.proc_ref | inFpathN must be set.')
        }
        out$ref <<- .parse_ref(inFpathN)
        out$refname <<- .ref_to_refname(out$ref)
        out$reflength <<- .ref_to_reflength(out$ref)
        if(auto.save){
          saveme()
        }
        invisible()  
      },
      .get_ref=function(){
        'For internal use only.'
        'Get reference genome in the normal sample.'
        .proc_ref()
        out$ref
      },
      .get_refname=function(){
        'For internal use only.'
        .proc_ref()
        out$refname
      },
      get_refname=function(){
        'Get reference genome name in the normal sample.'
        as.character(.get_refname())
      },
      .get_reflength=function(){
        'For internal use only.'
        .proc_ref()
        out$reflength
      },
      get_reflength=function(){
        'Get reference genome length in the normal sample.'
        ret <- .get_reflength()
        names(ret) <- as.character(.get_refname()[names(ret)])
        return(ret)
      },
      saveme=function(){
        'Save the output for later usage.'
        ## .self$out$inFpathN <- .self$inFpathN
        ## .self$out$inFpathT <- .self$inFpathT
        ## .self$out$outFpre <- .self$outFpre
        ## .self$out$libsizeN <- .self$libsizeN
        ## .self$out$libsizeT <- .self$libsizeT
        ## .self$out$mindepth <- .self$mindepth
        ## .self$out$minsize <- .self$minsize
        ## .self$out$regions <- .self$regions
        ## .self$out$segtype <- .self$segtype
        ## .self$out$dsN <- .self$dsN
        ## .self$out$dsT <- .self$dsT
        ## .self$out$pcThreads <- .self$pcThreads
        ## .self$out$auto.save <- .self$auto.save
        ## .self$out$auto.load <- .self$auto.load
        base::save(file=out$outFpath,out)
      },
      ## saveobj=function(){
      ##   'Save the class instance for later usage.'
      ##   obj <- .self
      ##   base::save(file=out$objFpath,obj)
      ## },
      loadme=function(){
        'Load a previously saved output.'
        if (is.file(out$outFpath)) {
          base::load(file=out$outFpath)
          .names <- names(out)
          .names <- .names[-grep('^\\.',.names)]
          for (a in .names){
            .self$out[[a]] <- out[[a]]
          }
        }
      },
      .loadme=function(x,method=NULL){
        ret <- FALSE
        flag <- FALSE
        if (!length(method)){
          flag <- TRUE
        } else{
          if(!method %in% force.rerun){
            flag <- TRUE            
          }
        }
        if (flag){
          if (!x %in% names(out)){
            if (auto.load) {
              loadme()
            }          
          }
          if (x %in% names(out)){
            ret <- TRUE
          }
        }
        return(ret)
      },
      proc_normal=function(){
        'Process the normal sample: make the windows and count the reads per window.'
        
        if (.loadme('dataN','proc_normal')) return()
        ##
        stampmsg("proc_normal | this may take a while depending on the size of your library.")
        ##logme(regions)
        .regions <- .get_regions()
        tmp <- .make_windows(regions=.regions,inFpath=get_inFpathN(),mindepth=get_mindepth(),minsize=get_minsize(),ds=get_dsN(),pcThreads=get_pcThreads())
        ##logme(head(tmp))
        stampmsg("Processed all ",if (!length(nrow(.regions))) 0 else nrow(.regions)," regions.")
        if (!nrow(tmp)){
          stop('proc_normal | no enough reads to fulfil requirements.')
        }
        out$windows <<- tmp[,1:3]
        out$size <<- tmp[,4]
        out$pos <<- ceiling((tmp[,2]+tmp[,3])/2)
        out$dataN <<- tmp[,-(1:4),drop=FALSE]
        if(auto.save){
          saveme()
        }
      }, 
      proc_tumor=function(){
        'Process the tumor sample: count the reads per window.'
        
        if (.loadme('dataT','proc_tumor')) return()
        ##
        if (!'windows' %in% names(out)){
          proc_normal()
        }
        ##
        ## .set_tumor()
        stampmsg("proc_tumor | this may take a while depending on the size of your library.")
        .windows <- .get_windows()
        tmp <- .count_starts(.windows,get_inFpathT(),ds=get_dsT(),pcThreads=get_pcThreads())
        stampmsg("Processed all ",if (!length(nrow(.windows))) 0 else nrow(.windows)," windows.")
        if (!nrow(tmp)){
          stop('proc_tumor | no enough reads to fulfil requirements.')
        }
        ## XB
        out$dataT <<- tmp
        if(auto.save){
          saveme()
        }
      },
      calc_cn=function(){
        'Calculate the relative copy number ratios (CNRs) per window, using the depth in the control (normal) sample as denominator and the case (tumor) sample as numerator.'

        if (.loadme('cn','calc_cn')) return()
        ##
        stampmsg("calc_cn")
        .MoreArgs.cn <- get_MoreArgs.cn()
        .MoreArgs.cn$depthN <- get_depthN()
        .MoreArgs.cn$depthT <- get_depthT()
        .MoreArgs.cn$libsizeN <- get_libsizeN()
        .MoreArgs.cn$libsizeT <- get_libsizeT()
        .MoreArgs.cn$dsN <- get_dsN()
        .MoreArgs.cn$dsT <- get_dsT()
        out$cn <<- do.call(.calc_cn,.MoreArgs.cn)
        
        ## out$cn <<- .calc_cn(
        ##   get_depthN(),get_depthT(),
        ##   libsizeN=get_libsizeN(),libsizeT=get_libsizeT(),
        ##   dsN=get_dsN(),dsT=get_dsT(),
        ##   ...
        ##   )
        ##logme(summary(out$cn))
        
        if(auto.save){
          saveme()
        }
        stampmsg('calc_cn | done')
        message(cat('out$cn:'))
        message(str(out$cn))
        message('')
      },
      proc_cn=function(){
        'Process the output of coy number object and return as a data.frame.'
        
        if (.loadme('data.cn','proc_cn')) return()
        ##
        stampmsg("proc_cn")
        
        tmp <- cbind(.get_windows(),size=get_size(),pos=get_pos(),get_dataN(),get_dataT(),cnr=get_cnr())
        tmp <- as.data.frame.matrix(tmp)
        ## tmp <- .refid_to_refname(tmp,.get_ref(),'chr') # replace refid
        tmp[,'chr'] <- .ref_to_refname(.get_ref())[as.character(tmp[,'refid'])] # keep refid
        tmp <- tmp[,unique(c('refid','chr','start','end','size','pos','depthN','depthT','cnr',colnames(tmp)))]
        ## logme(head(tmp))
        out$data.cn <<- tmp
        if(auto.save){
          saveme()
        }
      },
      calc_segm=function(){
        'Calculate the segment and detect the change points.'

        if (.loadme('seg','calc_segm')) return()
        ##
        if (!'data.cn' %in% names(out)){
          proc_cn()
        }
        ##
        stampmsg("calc_segm")
        
        out$seg <<- list()
        for (e in get_segmtype()){
          .MoreArgs.segm <- get_MoreArgs.segm()
          ## if (e %in% parse_segmtype()){        
          ##   .MoreArgs.segm$Q <- .MoreArgs.segm$maxnseg
          ##   .MoreArgs.segm$maxnseg <- NULL
          ## }
          .segm <- .calc_segm(.get_data.cn(),segtype=e,.ref=.get_ref(),.dots=.MoreArgs.segm,pcThreads=get_pcThreads())
          out$seg[[e]] <<- .segm
        }
        if(auto.save){
          saveme()
        }
        stampmsg('calc_segm | done')
        message(cat('out$seg:'))
        message(str(out$seg))
        message('')
      },
      proc_segm=function(){
        'Process the output of segmentation object and return as a data.frame.'        

        if (.loadme('data.segm','proc_segm')) return()
        ##
        stampmsg("proc_segm")
        out$data.segm <<- list()
        for (e in get_segmtype()){
          tmp <- .proc_segm(get_segm()[[e]],.get_data.cn(),pcThreads=get_pcThreads())
          out$data.segm[[e]] <<- tmp
        }
        if(auto.save){
          saveme()
        }
      },
      write_cn=function(cnFpath,...){
        'Write the output of copy numbers as a data.frame to a tab separated file. cnFpath: a file path (relative to outFpre) for `cn\' output; ...: see `Xmisc::write.data.table\'.'
        stampmsg("write_cn")
        out$.cnFpath <<- sprintf("%s/ngscopy_cn.txt",out$outFpre)
        if (missing(cnFpath)){
          cnFpath <- out$.cnFpath
        } else {
          cnFpath <- sprintf("%s/%s",out$outFpre,cnFpath)
        }
        tmp <- get_data.cn()
        write.data.table(cnFpath,tmp,...)
      },
      write_segm=function(segFpath,...){
        'Write the output of segments as a data.frame to a tab separated file. segFpath: a file path (relative to outFpre) for `seg\' output; ...: see `Xmisc::write.data.table\'.'
        stampmsg("write_segm")
        out$.segmFpath <<- sprintf("%s/ngscopy_segm.txt",out$outFpre)
        if (missing(segFpath)){
          segFpath <- out$.segmFpath
        } else {
          segFpath <- sprintf("%s/%s",out$outFpre,segFpath)
        }
        tmp <- get_data.segm()
        write.data.table(segFpath,tmp,...)
      },
      plot_out=function(
        pdfFpath,
        width,height,
        scales,
        xlim,ylim,xlab,ylab,
        ...,
        MoreArgs.plotcn,
        MoreArgs.plotseg
        ){
        'Plot the output and save to a pdf file. pdfFpath: a file path (relative to outFpre) for the pdf output; width,height: see `grDevices::pdf\'; scales: are scales shared across all chromossomes (i.e. x coordinates reflect the range of genomic coordinates per chromosome, y coordinates reflect the range of of CNRs per chromosome), given no specific `xlim\' and `ylim\' (the default, "fixed"), or do they vary across x coordinates ("free_x"), y coordinates ("free_y"), or both ("free"); xlim,ylim,xlab,ylab,...: see `graphics::plot\'; MoreArgs.plotcn: additional arguments as in `graphics::points\'; MoreArgs.plotseg: additional arguments as in `graphics::segments\'.'
        stampmsg("plot_out")
        
        if (missing(width)) width <- NULL
        if (missing(height)) height <- NULL
        if (missing(xlim)) xlim <- NULL
        if (missing(ylim)) ylim <- NULL
        if (missing(xlab)) xlab <- NULL
        if (missing(ylab)) ylab <- NULL
        if (missing(MoreArgs.plotcn)) MoreArgs.plotcn <- NULL
        if (missing(MoreArgs.plotseg)) MoreArgs.plotseg <- NULL
        
        scales <- .parse_scales(scales)[1]
        ## 
        reflength <- .get_reflength()
        data.cn <- .get_data.cn()
        data.segm <- .get_data.segm()
        out$.pdfFpath <<- sprintf("%s/ngscopy_out.pdf",out$outFpre)
        if (missing(pdfFpath)){
          pdfFpath <- out$.pdfFpath
        } else {
          pdfFpath <- sprintf("%s/%s",out$outFpre,pdfFpath)
        }
        ## 
        .plot_out(
          reflength=reflength,
          data.cn=data.cn,
          data.segm=data.segm,
          pdfFpath=pdfFpath,
          width=width,
          height=height,
          xlim=xlim,
          ylim=ylim,
          xlab=xlab,
          ylab=ylab,
          ...,
          MoreArgs.plotcn=MoreArgs.plotcn,
          MoreArgs.plotseg=MoreArgs.plotseg,
          scales=scales
          )
      },
      save_normal=function(){
        'Get the output of the normal for later usage.'
        ## save using a reserved variable name 
        ##..ngscopy.normal.. <- out[c('windows','dataN','size')]
        out$.normalFpath <<- sprintf("%s/ngscopy_normal.RData",get_outFpre())
        ..ngscopy.normal.. <-
          list(
            inFpathN=get_inFpathN(),
            libsizeN=get_libsizeN(),
            mindepth=get_mindepth(),
            minsize=get_minsize(),
            regions=.get_regions(),
            segtype=get_segmtype(),
            dsN=get_dsN(),
            windows=.get_windows(),
            size=get_size(),
            pos=get_pos(),
            depthN=get_depthN(),
            dataN=.get_dataN()
            ) ##XB
        base::save(file=out$.normalFpath,..ngscopy.normal..)
        logsave(out$.normalFpath)
      },
      load_normal=function(normalDpath){
        'Load a previously saved output of the normal. normalDpath: the path to the .RData file for the output of the normal.'
        out$.normalFpath <<- sprintf("%s/ngscopy_normal.RData",normalDpath)
        if (!is.file(out$.normalFpath)){
          stop('NGScopy$load_normal | the output of the normal is not available.')
        }
        base::load(file=out$.normalFpath)
        out$inFpathN <<- ..ngscopy.normal..$inFpathN
        out$libsizeN <<- ..ngscopy.normal..$libsizeN
        out$mindepth <<- ..ngscopy.normal..$mindepth
        out$minsize <<- ..ngscopy.normal..$minsize
        out$regions <<- ..ngscopy.normal..$regions
        out$segtype <<- ..ngscopy.normal..$segtype
        out$dsN <<- ..ngscopy.normal..$dsN
        out$windows <<- ..ngscopy.normal..$windows
        out$size <<- ..ngscopy.normal..$size
        out$pos <<- ..ngscopy.normal..$pos
        out$depthN <<- ..ngscopy.normal..$depthN
        out$dataN <<- ..ngscopy.normal..$dataN

        .self$inFpathN <- ..ngscopy.normal..$inFpathN
        .self$libsizeN <- ..ngscopy.normal..$libsizeN
        .self$mindepth <- ..ngscopy.normal..$mindepth
        .self$minsize <- ..ngscopy.normal..$minsize
        .self$regions <- ..ngscopy.normal..$regions
        .self$segtype <- ..ngscopy.normal..$segtype
        .self$dsN <- ..ngscopy.normal..$dsN        
      },
      show=function(){
        'Show the class instance friendly.'
        .scipen <- options()$scipen
        options(scipen=9)
        if (length(.self$inFpathN)) cat0('inFpathN: ',.self$inFpathN)
        if (length(.self$inFpathT)) cat0('inFpathT: ',.self$inFpathT)
        if (length(.self$outFpre)) cat0('outFpre: ',.self$outFpre)
        if (length(.self$libsizeN)) cat0('libsizeN: ',.self$libsizeN)
        if (length(.self$libsizeT)) cat0('libsizeT: ',.self$libsizeT)
        if (length(.self$mindepth)) cat0('mindepth: ',.self$mindepth) 
        if (length(.self$minsize)) cat0('minsize: ',.self$minsize)
        if (nrow(.self$regions)) {
          cat0('regions: \n',dfconcat(.refid_to_refname(
          head(.self$regions),.get_ref(),refname='chr'
          )))
          if (nrow(.self$regions)>6){
            cat0('... ...')
          }
        }
        if (length(.self$segtype)) cat0('segtype: ',vconcat(.self$segtype,capsule=TRUE,quote=TRUE))
        if (length(.self$dsN)) cat0('dsN: ',.self$dsN)
        if (length(.self$dsT)) cat0('dsT: ',.self$dsT)
        if (length(.self$MoreArgs.cn)){
          cat0('MoreArgs.cn:')
          cat0(str(.self$MoreArgs.cn))
        }
        if (length(.self$MoreArgs.segm)){
          cat0('MoreArgs.segm:')
          cat0(str(.self$MoreArgs.segm))
        }
        if (length(.self$auto.save)) cat0('auto.save: ',.self$auto.save)
        if (length(.self$auto.load)) cat0('auto.load: ',.self$auto.load)        
        options(scipen=.scipen)    
      }
      )
    )



##' Read regions from a data.frame, a file or a connection.
##'
##' 
##' @title Read regions from a data.frame, a file or a connection.
##' @param x data.frame or character, the regions in study.
##' A data.frame with three columns (chr/start/end), a file path or a
##' connection with three columns (chr/start/end) without column names.
##' Return an empty data.frame if "", "NULL" or NULL.
##' @return data.frame of three columns (chr/start/end)
##' @author Xiaobei Zhao
##' @examples
##' read_regions("
##' chr1 0 249250621
##' chr2 0 243199373
##' chr3 0 198022430
##' chr4 0 191154276
##' chr5 0 180915260
##' chr6 0 171115067
##' ")
read_regions <- function(x){
  x <- .check_regions(x)
  if (is.data.frame(x)){
    return(x)
  }
  ret <- read.table(
    x,header=FALSE,
    col.names=c('chr','start','end'),
    colClasses=c('character','numeric','numeric')
    )
  try(close(x),silent=TRUE)
  return(ret)
}


##' Parse the type of segmentation or return all available types
##' with missing input.
##' 
##' @title Parse the type of segmentation
##' @param segtype character, the type of segmentation.
##' Return all available types if missing.
##' @return the type of segmentation
##' @author Xiaobei Zhao
##' @examples
##' parse_segmtype()
parse_segmtype <- function(segtype){
  .parse_segmtype(segtype)
}


##' Get help for segmentation functions given the type of
##' segmentation.
##'
##' 
##' @title Get help for segmentation functions
##' @param segtype character, the type of segmentation.
##' See \code{parse_segmtype()}
##' @return NULL
##' @author Xiaobei Zhao
##' @examples
##' \dontrun{
##' help_segmtype(parse_segmtype()[1])
##' }
help_segmtype <- function(segtype){
  .help_segmtype(segtype)
}


##' Convert a data.frame to a GRanges object
##'
##' 
##' @title Convert a data.frame to a GRanges object
##' @param x data.frame or matrix
##' @param which.chr character, the column name of `chr`
##' @param which.start character, the column name of `start`
##' @param which.end character, the column name of `end`
##' @param which.width character, the column name of `width`
##' @param which.name character, the column name of `name`
##' @param chrlength numeric, the lengths of the chromosomes
##' @param start0 logical, wheter the `start` is 0-based.
##' @return a GRanges object
##' @author Xiaobei Zhao
##' @examples
##' \dontrun{
##' x <- data.frame(
##'   chr=c('chr1','chr2'),start=0,end=100,
##'   name=paste0('ID',1:2),score=1:2
##'   )
##' df.to.gr(x)
##' df.to.gr(x,chrlength=c(chr1=1000,chr2=1200))
##' }
df.to.gr <- function(
  x,
  which.chr='chr',which.start='start',which.end='end',
  which.width='width',which.name='name',
  chrlength=NULL,
  start0=TRUE
  ){
  
  if (!.check.packages("GenomicRanges")){
    message('df.to.gr | Package GenomicRanges is not installed.')
    Rle <- NULL
    IRanges <- NULL
    GRanges <- NULL
    `seqlengths<-` <- NULL
    return()
  } 
  ## logme(search(),'df.to.gr | 1')
  x <- as.data.frame(x)
  n <- nrow(x)
  .x <- as.list(x)
  .x[[which.chr]] <- NULL
  .x[[which.start]] <- NULL
  .x[[which.end]] <- NULL
  .x[[which.width]] <- NULL
  .x[[which.name]] <- NULL
  if (length(x[[which.start]]) & start0){
    x[[which.start]] <- x[[which.start]] + 1
  }
  .x$seqnames <- Rle(x[[which.chr]],rep(1,n))
  .x$ranges <- IRanges(x[[which.start]],x[[which.end]],width=x[[which.width]],names=x[[which.name]])
  
  gr <- do.call(GRanges,.x)
  if(length(chrlength) & length(x[[which.chr]])){
    if(length(names(chrlength))){
      chrlength <- chrlength[names(chrlength) %in% as.character(x[[which.chr]])]
    }
    seqlengths(gr) <- chrlength
  }
  ## logme(search(),'df.to.gr | 2')
  return(gr)  
}

Try the NGScopy package in your browser

Any scripts or data that you put into this service are public.

NGScopy documentation built on May 6, 2019, 2:43 a.m.