inst/doc/methylKit.R

## ----setup, include=FALSE------------------------------------------------
knitr::opts_chunk$set(dpi = 75)
knitr::opts_chunk$set(cache = FALSE)

## ----eval=TRUE,echo=FALSE,warning=FALSE,message=FALSE--------------------
#devtools::load_all(".")

## ---- eval=TRUE, echo=FALSE----------------------------------------------
tab <- read.table( system.file("extdata", "test1.myCpG.txt", package = "methylKit"),header=TRUE,nrows=5)
tab
#knitr::kable(tab)

## ----message=FALSE-------------------------------------------------------
library(methylKit)
file.list=list( system.file("extdata", 
                            "test1.myCpG.txt", package = "methylKit"),
                system.file("extdata",
                            "test2.myCpG.txt", package = "methylKit"),
                system.file("extdata", 
                            "control1.myCpG.txt", package = "methylKit"),
                system.file("extdata", 
                            "control2.myCpG.txt", package = "methylKit") )


# read the files to a methylRawList object: myobj
myobj=methRead(file.list,
           sample.id=list("test1","test2","ctrl1","ctrl2"),
           assembly="hg18",
           treatment=c(1,1,0,0),
           context="CpG"
           )

## ---- message=FALSE,warning=FALSE----------------------------------------
library(methylKit)
file.list=list( system.file("extdata", "test1.myCpG.txt", package = "methylKit"),
                system.file("extdata", "test2.myCpG.txt", package = "methylKit"),
                system.file("extdata", "control1.myCpG.txt", package = "methylKit"),
                system.file("extdata", "control2.myCpG.txt", package = "methylKit") )


# read the files to a methylRawListDB object: myobjDB 
# and save in databases in folder methylDB
myobjDB=methRead(file.list,
           sample.id=list("test1","test2","ctrl1","ctrl2"),
           assembly="hg18",
           treatment=c(1,1,0,0),
           context="CpG",
           dbtype = "tabix",
           dbdir = "methylDB"
           )

print(myobjDB[[1]]@dbpath)



## ---- eval=FALSE---------------------------------------------------------
#  my.methRaw=processBismarkAln( location =
#                                  system.file("extdata",
#                                                  "test.fastq_bismark.sorted.min.sam",
#  	                                              package = "methylKit"),
#                           sample.id="test1", assembly="hg18",
#                           read.context="CpG", save.folder=getwd())

## ------------------------------------------------------------------------
getMethylationStats(myobj[[2]],plot=FALSE,both.strands=FALSE)

## ------------------------------------------------------------------------
getMethylationStats(myobj[[2]],plot=TRUE,both.strands=FALSE)

## ------------------------------------------------------------------------
getCoverageStats(myobj[[2]],plot=TRUE,both.strands=FALSE)

## ------------------------------------------------------------------------
filtered.myobj=filterByCoverage(myobj,lo.count=10,lo.perc=NULL,
                                      hi.count=NULL,hi.perc=99.9)

## ------------------------------------------------------------------------
meth=unite(myobj, destrand=FALSE)

## ------------------------------------------------------------------------
head(meth)

## ----eval=FALSE----------------------------------------------------------
#  # creates a methylBase object,
#  # where only CpGs covered with at least 1 sample per group will be returned
#  
#  # there were two groups defined by the treatment vector,
#  # given during the creation of myobj: treatment=c(1,1,0,0)
#  meth.min=unite(myobj,min.per.group=1L)

## ------------------------------------------------------------------------
getCorrelation(meth,plot=TRUE)

## ------------------------------------------------------------------------
clusterSamples(meth, dist="correlation", method="ward", plot=TRUE)

## ----message=FALSE-------------------------------------------------------
hc = clusterSamples(meth, dist="correlation", method="ward", plot=FALSE)

## ------------------------------------------------------------------------
PCASamples(meth, screeplot=TRUE)

## ------------------------------------------------------------------------
PCASamples(meth)

## ------------------------------------------------------------------------
# make some batch data frame
# this is a bogus data frame
# we don't have batch information
# for the example data
sampleAnnotation=data.frame(batch_id=c("a","a","b","b"),
                            age=c(19,34,23,40))

as=assocComp(mBase=meth,sampleAnnotation)
as

# construct a new object by removing the first pricipal component
# from percent methylation value matrix
newObj=removeComp(meth,comp=1)

## ------------------------------------------------------------------------
mat=percMethylation(meth)

# do some changes in the matrix
# this is just a toy example
# ideally you want to correct the matrix
# for batch effects
mat[mat==100]=80
 
# reconstruct the methylBase from the corrected matrix
newobj=reconstruct(mat,meth)

## ----warning=FALSE-------------------------------------------------------
tiles=tileMethylCounts(myobj,win.size=1000,step.size=1000)
head(tiles[[1]],3)

## ------------------------------------------------------------------------
myDiff=calculateDiffMeth(meth)

## ------------------------------------------------------------------------
# get hyper methylated bases
myDiff25p.hyper=getMethylDiff(myDiff,difference=25,qvalue=0.01,type="hyper")
#
# get hypo methylated bases
myDiff25p.hypo=getMethylDiff(myDiff,difference=25,qvalue=0.01,type="hypo")
#
#
# get all differentially methylated bases
myDiff25p=getMethylDiff(myDiff,difference=25,qvalue=0.01)

## ------------------------------------------------------------------------
diffMethPerChr(myDiff,plot=FALSE,qvalue.cutoff=0.01, meth.cutoff=25)

## ----eval=FALSE----------------------------------------------------------
#  
#  sim.methylBase1<-dataSim(replicates=6,sites=1000,
#                           treatment=c(rep(1,3),rep(0,3)),
#                          sample.ids=c(paste0("test",1:3),paste0("ctrl",1:3))
#                          )
#  
#  my.diffMeth<-calculateDiffMeth(sim.methylBase1[1:,],
#                                  overdispersion="MN",test="Chisq",mc.cores=1)

## ----eval=FALSE----------------------------------------------------------
#  
#  covariates=data.frame(age=c(30,80,34,30,80,40))
#  sim.methylBase<-dataSim(replicates=6,sites=1000,
#                          treatment=c(rep(1,3),rep(0,3)),
#                          covariates=covariates,
#                          sample.ids=c(paste0("test",1:3),paste0("ctrl",1:3))
#                          )
#  my.diffMeth3<-calculateDiffMeth(sim.methylBase,
#                                  covariates=covariates,
#                                  overdispersion="MN",test="Chisq",mc.cores=1)

## ---- eval=FALSE---------------------------------------------------------
#  myDiff=calculateDiffMeth(meth,mc.cores=2)

## ------------------------------------------------------------------------
library(genomation)

# read the gene BED file
gene.obj=readTranscriptFeatures(system.file("extdata", "refseq.hg18.bed.txt", 
                                           package = "methylKit"))
#
# annotate differentially methylated CpGs with 
# promoter/exon/intron using annotation data
#
annotateWithGeneParts(as(myDiff25p,"GRanges"),gene.obj)

## ------------------------------------------------------------------------
# read the shores and flanking regions and name the flanks as shores 
# and CpG islands as CpGi
cpg.obj=readFeatureFlank(system.file("extdata", "cpgi.hg18.bed.txt", 
                                        package = "methylKit"),
                           feature.flank.name=c("CpGi","shores"))
#
# convert methylDiff object to GRanges and annotate
diffCpGann=annotateWithFeatureFlank(as(myDiff25p,"GRanges"),
                                    cpg.obj$CpGi,cpg.obj$shores,
                         feature.name="CpGi",flank.name="shores")

## ------------------------------------------------------------------------
promoters=regionCounts(myobj,gene.obj$promoters)

head(promoters[[1]])

## ------------------------------------------------------------------------
diffAnn=annotateWithGeneParts(as(myDiff25p,"GRanges"),gene.obj)

# target.row is the row number in myDiff25p
head(getAssociationWithTSS(diffAnn))

## ------------------------------------------------------------------------
getTargetAnnotationStats(diffAnn,percentage=TRUE,precedence=TRUE)

## ------------------------------------------------------------------------
plotTargetAnnotation(diffAnn,precedence=TRUE,
    main="differential methylation annotation")

## ------------------------------------------------------------------------
plotTargetAnnotation(diffCpGann,col=c("green","gray","white"),
       main="differential methylation annotation")

## ------------------------------------------------------------------------
getFeatsWithTargetsStats(diffAnn,percentage=TRUE)

## ------------------------------------------------------------------------
class(meth)
as(meth,"GRanges")
class(myDiff)
as(myDiff,"GRanges")

## ------------------------------------------------------------------------
class(myobjDB[[1]])

## ----eval=FALSE----------------------------------------------------------
#  as(myobjDB[[1]],"methylRaw")

## ----eval=FALSE----------------------------------------------------------
#  data(methylKit)
#  
#  objDB=makeMethylDB(methylBase.obj,"exMethylDB")
#  

## ------------------------------------------------------------------------
select(meth,1:5) # get first 10 rows of a methylBase object
myDiff[21:25,] # get 5 rows of a methylDiff object

## ----message=FALSE,warning=FALSE,eval=FALSE------------------------------
#  library(GenomicRanges)
#  my.win=GRanges(seqnames="chr21",
#                 ranges=IRanges(start=seq(from=9764513,by=10000,length.out=20),width=5000) )
#  
#  # selects the records that lie inside the regions
#  selectByOverlap(myobj[[1]],my.win)

## ----eval=FALSE----------------------------------------------------------
#  # creates a new methylRawList object
#  myobj2=reorganize(myobj,sample.ids=c("test1","ctrl2"),treatment=c(1,0) )
#  # creates a new methylBase object
#  meth2 =reorganize(meth,sample.ids=c("test1","ctrl2"),treatment=c(1,0) )

## ---- eval=FALSE---------------------------------------------------------
#  # creates a matrix containing percent methylation values
#  perc.meth=percMethylation(meth)

## ---- eval=FALSE---------------------------------------------------------
#   download.file("https://github.com/BIMSBbioinfo/compgen2018/raw/master/day3_diffMeth/data/H1.chr21.chr22.rds",
#                 destfile="H1.chr21.chr22.rds",method="curl")
#  
#   mbw=readRDS("H1.chr21.chr22.rds")
#  
#   # it finds the optimal number of componets as 6
#   res=methSeg(mbw,diagnostic.plot=TRUE,maxInt=100,minSeg=10)
#  
#   # however the BIC stabilizes after 4, we can also try 4 componets
#   res=methSeg(mbw,diagnostic.plot=TRUE,maxInt=100,minSeg=10,G=1:4)
#  
#   # get segments to BED file
#   methSeg2bed(res,filename="H1.chr21.chr22.trial.seg.bed")
#  
#  

## ------------------------------------------------------------------------
sessionInfo() 

## ----eval=TRUE,echo=FALSE------------------------------------------------
# tidy up                  
rm(myobjDB)              
unlink(list.files(pattern = "methylDB",full.names = TRUE),recursive = TRUE)
qizhengyang2017/methylKit_1.8.1 documentation built on May 5, 2019, 7:58 p.m.