R/target_cnv_purity_corrected_calls.R

Defines functions target_cnv_calls

Documented in target_cnv_calls

#' CNV calls, corrected for purity
#'
#' This function loads the pureCN-corrected
#' copy number data as generated by cnvKit \code{call}.
#' The data were accessed from \code{/data/CCRBioinfo/projects/TargetOsteoDiscovery/report/summary.pureCN.cns.calls}
#' on December 13, 2018.
#'
#' @return
#'   A GRanges object describing regions of copy number change with
#'   medata columns:
#'   \itemize{
#'   \item{log2: log2 copy number}
#'   \item{baf: average B-Allele-Frequency for the copy number region}
#'   \item{cn: total integer copy number}
#'   \item{cn1: integer copy number of allele 1}
#'   \item{cn2: integer copy number of allele 2}
#'   \item{depth: average depth of region}
#'   \item{probes: from cnvKit, the number of bins}
#'   \item{weight: from cnvKit}
#'   }
#'
#' @importFrom readr read_tsv
#' @import GenomicRanges
#'
#' @examples
#' x = target_cnv_calls()
#' head(x)
#' colnames(x)
#' # number of regions per sample
#' hist(as.vector(table(x$sampleName)))
#' hist(x$baf)
#' hist(x$log2)
#'
#' # Make a plot of log2 vs BAF for TP53 gene
#' \dontrun{
#' library(GenomicRanges)
#' library(TxDb.Hsapiens.UCSC.hg19.knownGene)
#' glocs = genes(TxDb.Hsapiens.UCSC.hg19.knownGene)
#' library(org.Hs.eg.db)
#' library(AnnotationDbi)
#' glocs$symbol = unlist(AnnotationDbi::select(org.Hs.eg.db,
#'     columns = "SYMBOL", keytype = "ENTREZID", keys = glocs$gene_id)$SYMBOL)
#' glocs = glocs[!is.na(glocs$symbol)]
#' tp53_regions = subsetByOverlaps(x, glocs[glocs$symbol=='TP53'])
#' plot(tp53_regions$log2, tp53_regions$baf)
#' }
#'
#' @export
target_cnv_calls = function() {
  fname = system.file('extdata/summary.pureCN.cns.calls.tsv.gz', package='TargetOsteoAnalysis')
  dat = read_tsv(fname, col_names = TRUE)
  gr = GRanges(seqnames = dat[['chromosome']],
               ranges=IRanges( start=dat[['start']], end=dat[['end']]))
  colnames(dat)[1]='sampleName'
  mcols(gr) = dat[,-c(2,3,4)]
  gr
}
seandavi/TargetOsteoAnalysis documentation built on May 22, 2020, 8:23 p.m.