inst/doc/genoset.R

## ----style, echo = FALSE, results='hide'--------------------------------------
BiocStyle::markdown()

## ----objectcreation-----------------------------------------------------------
library(genoset)

sample.names = LETTERS[11:13]
probe.names = paste("p", 1:1000, sep="")
num.samples = length(sample.names)
num.probes  = length(probe.names)

locs = GRanges(
  ranges= IRanges(
      start=c(seq(from=125e6,by=3e4,length=400), seq(from=1,length=400,by=3.25e4),
              seq(from=30e6,length=200,by=3e4)),width=1,names=probe.names),
  seqnames=factor(c(rep("chr8",400), rep("chr12",400),rep("chr17",200)),levels=c("chr8","chr12","chr17")))

fake.cn = matrix(c(
  c(rnorm(200,.4,0.05),rnorm(200,.2,0.05),rnorm(200,0.23,0.05),rnorm(200,.15,0.05),rnorm(200,2.,0.05)), 
  c(rnorm(200,0,0.05), rnorm(200,3,0.05),rnorm(200,14,0.05),rnorm(200,.1,0.05),rnorm(200,-0.05,0.05)),
  c(rnorm(200,.1,0.05),rnorm(200,1,0.05),rnorm(200,-.5,0.05),rnorm(200,3,0.05),rnorm(200,3,0.05))
  ),
  nrow=num.probes,ncol=num.samples,dimnames=list(probe.names,sample.names))
fake.pData=data.frame(matrix(LETTERS[1:15],nrow=3,ncol=5,dimnames=list(sample.names,letters[1:5])))

gs = GenoSet( rowRanges=locs, assays=list(cn=fake.cn), colData=fake.pData )
gs

rle.ds = GenoSet( rowRanges=locs,
                 assays=list(cn = fake.cn,
                             cn.segments=RleDataFrame(
                                 K=Rle(c(rep(1.5,300),rep(2.3,700))),L=Rle( c(rep(3.2,700),rep(2.1,300)) ),
                                 M=Rle(rep(1.1,1000)),row.names=rownames(fake.cn))),
                             colData=fake.pData
  )


## ----objectassaydata----------------------------------------------------------
names(assays(rle.ds))
head( rle.ds[,,"cn"] )
head( rle.ds[,,"cn.segments"] )

## ----accessgenomeinfo---------------------------------------------------------
head( rowRanges(gs) )
chrNames(gs)
chrOrder(c("chr12","chr12","chrX","chr8","chr7","chrY"))
chrInfo(gs)
chrIndices(gs)

head(chr(gs))
head(start(gs))
head(end(gs))
head(pos(gs))
head(genoPos(gs))

## ----genomeorder--------------------------------------------------------------
chrOrder(chrNames(gs))
gs = toGenomeOrder(gs, strict=TRUE)
isGenomeOrder(gs, strict=TRUE)

## ----subsetbychr--------------------------------------------------------------
chr12.ds = gs[ chrIndices(gs,"chr12"), ]
dim(chr12.ds)
chrIndices(chr12.ds)

## ----subsetbygene-------------------------------------------------------------
gene.gr = GRanges(ranges=IRanges(start=c(35e6,127e6),end=c(35.5e6,129e6),                       
                       names=c("HER2","CMYC")), seqnames=c("chr17","chr8"))
gene.ds = gs[ gene.gr, ]
dim(gene.ds)
chrIndices(gene.ds)

## ----subsetsamples------------------------------------------------------------
dim(gs[1:4,1:2])

## ----subsetassaydata----------------------------------------------------------
all( gs[ 1:4,1:2,"cn"] == assay(gs,"cn")[1:4,1:2] )

## ----GC, eval=FALSE-----------------------------------------------------------
#  library(BSgenome.Hsapiens.UCSC.hg19)
#  gc = rnorm(nrow(gs))
#  gs[,,"cn"] = gcCorrect(gs[,,"cn"],gc)

## ----cbsdirect----------------------------------------------------------------
library(DNAcopy)
cbs.cna = CNA(gs[,,"cn"], chr(gs), pos(gs) )
cbs.smoothed.CNA = smooth.CNA( cbs.cna )
cbs.segs = segment( cbs.cna )

## ----runCBS-------------------------------------------------------------------
gs[,,"cn.segs"] = runCBS(gs[,,"cn"],rowRanges(gs))

## ----cbsmulticore,eval=FALSE--------------------------------------------------
#  library(parallel)
#  gs[,,"cn.segs"] = runCBS(gs[, , "cn"],rowRanges(gs), n.cores=3)
#  gs[,,"cn.segs"][1:5,1:3]

## ----segments-----------------------------------------------------------------
head( gs[,,"cn.segs"] )

segs = segTable( gs[,2,"cn.segs"], rowRanges(gs) )
list.of.segs = segTable( gs[,,"cn.segs"], rowRanges(gs) )
rbind.list.of.segs = segTable( gs[,,"cn.segs"], rowRanges(gs), stack=TRUE )
two.kinds.of.segs = segPairTable( gs[,2,"cn.segs"], gs[,3,"cn.segs"], rowRanges(gs) )

rle = segs2Rle( segs, rowRanges(gs) )
rle.df = segs2RleDataFrame( list.of.segs, rowRanges(gs) )

bounds = matrix( c(1,3,4,6,7,10),ncol=2,byrow=TRUE)
cn = c(1,3,2)
rle = bounds2Rle( bounds, cn, 10 )

## ----plotgenome, echo=TRUE----------------------------------------------------
genoPlot(gs, gs[ , 1, "cn"])
genoPlot(gs, gs[ , 1, "cn.segs"], add=TRUE, col="red")

## ----plotchr, echo=TRUE-------------------------------------------------------
genoPlot(gs,gs[,1,"cn"],chr="chr12")
genoPlot(gs,gs[,1,"cn.segs"],chr="chr12",add=TRUE, col="red")

## ----plotchrsimple, eval=FALSE------------------------------------------------
#  chr12.ds = gs[chr(gs) == "chr12",]
#  genoPlot(pos(chr12.ds),chr12.ds[,1,"cn"],locs=rowRanges(chr12.ds))  # Numeric data and location
#  genoPlot(pos(chr12.ds),chr12.ds[,1,"cn.segs"],add=TRUE, col="red") # Rle data and numeric position

## ----mbafcutoff---------------------------------------------------------------
fake.baf  = sample(c(0,0.5,1), length(probe.names), replace=TRUE) + rnorm(length(probe.names),0,0.01)
fake.baf[ fake.baf > 1 ] = fake.baf[ fake.baf > 1 ] - 1
fake.baf[ fake.baf < 0 ] = fake.baf[ fake.baf < 0 ] + 1
hets = fake.baf < 0.75 & fake.baf > 0.25
fake.baf[ 101:200 ][ hets[101:200] ] = fake.baf[ 101:200 ][ hets[101:200] ] + rep(c(-0.2,0.2),50)[hets[101:200]]
fake.baf = matrix(fake.baf,nrow=num.probes,ncol=num.samples,dimnames=list(probe.names,sample.names))

baf.ds = GenoSet( rowRanges=locs, assays=list(lrr=fake.cn, baf=fake.baf), colData=fake.pData )
baf.ds[, , "mbaf"] = baf2mbaf(baf.ds[, , "baf"], hom.cutoff = 0.90)

## ----mbaftorle----------------------------------------------------------------
mbaf.data = RleDataFrame( sapply(colnames( baf.ds),
  function(x) { Rle( baf.ds[,x, "mbaf"] ) },
  USE.NAMES=TRUE, simplify=FALSE ) )
as.numeric(object.size( baf.ds[, ,"mbaf"]))  / as.numeric( object.size(mbaf.data))

## ----segment------------------------------------------------------------------
baf.ds[,,"baf.segs"] = runCBS( baf.ds[, ,"mbaf"], rowRanges(baf.ds) )
baf.ds[,,"lrr.segs"] = runCBS( baf.ds[, , "lrr"], rowRanges(baf.ds) )

## ----plotlrr------------------------------------------------------------------
genoPlot(baf.ds,baf.ds[,1,"lrr"],chr="chr12",main="LRR of chr12")
genoPlot(baf.ds,baf.ds[,1,"lrr.segs"],chr="chr12",add=TRUE,col="red")

## ----plotbaf, echo=TRUE-------------------------------------------------------
par(mfrow=c(2,1))
genoPlot(baf.ds,baf.ds[,1,"baf"],chr="chr12", main="BAF of chr12")
genoPlot(baf.ds,baf.ds[,1,"mbaf"],chr="chr12", main="mBAF of chr12")
genoPlot(baf.ds,baf.ds[,1,"baf.segs"],chr="chr12", add=TRUE,col="red")

## ----crosssample--------------------------------------------------------------
gain.list = lapply(colnames(baf.ds),
  function(sample.name) {
    as.logical( baf.ds[, sample.name, "lrr.segs"] > 0.3 )
})
gain.mat = do.call(cbind, gain.list)
gain.freq = rowMeans(gain.mat,na.rm=TRUE)

Try the genoset package in your browser

Any scripts or data that you put into this service are public.

genoset documentation built on Nov. 8, 2020, 6:07 p.m.