inst/doc/RW2_CpG_sets.R

## ----CpGsetExample------------------------------------------------------------
cpgset = list(
            chr1 = c(12L, 57L, 123L),
            chr2 = c(45L, 95L, 99L, 111L),
            chr3 = c(22L, 40L, 199L, 211L))

## ----loadPackages, echo=FALSE, warning=FALSE, message=FALSE-------------------
suppressPackageStartupMessages(library(ramwas))
suppressPackageStartupMessages(library(BSgenome.Ecoli.NCBI.20080805))

## ----cpgsFromGenome, warning=FALSE, message=FALSE-----------------------------
library(ramwas)
library(BSgenome.Ecoli.NCBI.20080805)
cpgset = getCpGsetCG(BSgenome.Ecoli.NCBI.20080805)
# First 10 CpGs in NC_008253:
print(cpgset$NC_008253[1:10])

## ----getCpGsetALL1, eval=FALSE, warning=FALSE, message=FALSE------------------
#  library(BSgenome.Hsapiens.UCSC.hg19)
#  library(SNPlocs.Hsapiens.dbSNP144.GRCh37)
#  genome = injectSNPs(Hsapiens, "SNPlocs.Hsapiens.dbSNP144.GRCh37")
#  cpgset = getCpGsetALL(genome)
#  # Number of CpGs with all SNPs injected in autosomes
#  sum(sapply(cpgset[1:22], length))

## ----echo=FALSE---------------------------------------------------------------
42841152

## ----getCpGsetALL2, eval=FALSE------------------------------------------------
#  genome[["chr22"]] =
#      injectSNPsMAF(
#          gensequence = BSGenome[["chr22"]],
#          frqcount = "count_ALL_chr22.txt",
#          MAF = 0.01)
#  
#  # Find the CpGs
#  cpgset = getCpGsetALL(genome)

## ----save1, eval=FALSE--------------------------------------------------------
#  saveRDS(file = "My_cpgset.rds", object = cpgset)

## ----insilicoFASTQ, eval=FALSE------------------------------------------------
#  # Do for all chromosomes
#  insilicoFASTQ(
#      con="chr1.fastq.gz",
#      gensequence = BSGenome[["chr1"]],
#      fraglength=75)

## ----RaMWAS, eval=FALSE-------------------------------------------------------
#  library(ramwas)
#  chrset = paste0("chr",1:22)
#  targetcov = 75
#  covtolerance = 10
#  
#  param = ramwasParameters(
#      dirproject = ".",
#      dirbam = "./bams",
#      dirfilter = TRUE,
#      bamnames = chrset,
#      bam2sample = list(all_samples = chrset),
#      scoretag = "AS",
#      minscore = 100,
#      minfragmentsize = targetcov,
#      maxfragmentsize = targetcov,
#      minavgcpgcoverage = 0,
#      minnonzerosamples = 0,
#      # filecpgset - file with the CpG set being QC-ed
#      filecpgset = filecpgset
#  )
#  param1 = parameterPreprocess(param)
#  ramwas1scanBams(param)
#  ramwas3normalizedCoverage(param)

## ----filter, eval=FALSE-------------------------------------------------------
#  # Preprocess parameters to learn the location of coverage matrix
#  param1 = parameterPreprocess(param)
#  
#  # Load the coverage matrix (vector)
#  cover = fm.load( paste0(param1$dircoveragenorm, "/Coverage"))
#  
#  # split the coverage by chromosomes
#  # `cpgset` - the CpG set being QC-ed
#  fac = rep(seq_along(cpgset), times = sapply(cpgset, length))
#  levels(fac) = names(cpgset)
#  class(fac) = "factor"
#  cover = split(cover, fac)
#  
#  # filter CpGs on each chromosome by the coverage
#  cpgsetQC = cpgset
#  for( i in seq_along(cpgset) ){
#      keep =
#          (cover[[i]] >= (targetcov - covtolerance)) &
#          (cover[[i]] <= (targetcov + covtolerance))
#      cpgsetQC[[i]] = cpgset[[i]][ keep ]
#  }

## ----save2, eval=FALSE--------------------------------------------------------
#  saveRDS(file = "My_cpgset_QC.rds", object = cpgsetQC)

Try the ramwas package in your browser

Any scripts or data that you put into this service are public.

ramwas documentation built on Nov. 8, 2020, 8:24 p.m.