inst/doc/GenomeInfoDb.R

## ----style, eval=TRUE, echo=FALSE, results="asis"--------------------------
BiocStyle::latex()

## ----preliminaries, echo=FALSE, message=FALSE------------------------------
library(GenomeInfoDb)
library(TxDb.Dmelanogaster.UCSC.dm3.ensGene)

## ----genomeStyles1---------------------------------------------------------
seqmap <- genomeStyles()
head(seqmap,n=2)

## ----name------------------------------------------------------------------
names(genomeStyles())

## ----genomeStyles2---------------------------------------------------------
head(genomeStyles("Homo_sapiens"),5)

## ----style-present---------------------------------------------------------
"UCSC" %in% names(genomeStyles("Homo_sapiens"))

## ----extractSeqlevels------------------------------------------------------
extractSeqlevels(species="Arabidopsis_thaliana", style="NCBI")

## ----extractSeqlevelsgroup-------------------------------------------------
extractSeqlevelsByGroup(species="Arabidopsis_thaliana", style="NCBI",
                         group="auto")

## ----seqlevelsStyle--------------------------------------------------------
seqlevelsStyle(paste0("chr",c(1:30)))
seqlevelsStyle(c("2L","2R","X","Xhet"))

## ----keepChr-txdb----------------------------------------------------------
newchr <- paste0("chr",c(1:22,"X","Y","M","1_gl000192_random","4_ctg9_hap1"))
seqlevelsInGroup(newchr, group="sex")
seqlevelsInGroup(newchr, group="auto")
seqlevelsInGroup(newchr, group="circular")
seqlevelsInGroup(newchr, group="sex","Homo_sapiens","UCSC")

## ----check2----------------------------------------------------------------
seqnames <- c("chr1", "chr9", "chr2", "chr3", "chr10")
all(seqnames %in% extractSeqlevels("Homo_sapiens", "UCSC"))

## ----orderSeqlevels--------------------------------------------------------
seqnames <- c("chr1","chr9", "chr2", "chr3", "chr10")
orderSeqlevels(seqnames)
seqnames[orderSeqlevels(seqnames)]

## ----rankSeqlevels---------------------------------------------------------
seqnames <- c("chr1","chr9", "chr2", "chr3", "chr10")
rankSeqlevels(seqnames)

## ----find------------------------------------------------------------------
mapSeqlevels(c("chrII", "chrIII", "chrM"), "NCBI")

## ----basic-gr--------------------------------------------------------------
gr <- GRanges(paste0("ch",1:35), IRanges(1:35, width=5))
gr

## ----renameseqlevels-------------------------------------------------------
newnames <- paste0("chr",1:35)
names(newnames) <- paste0("ch",1:35)
head(newnames)
gr <- renameSeqlevels(gr,newnames)
gr

## ----dropseqlevels---------------------------------------------------------
dropSeqlevels(gr, paste0("chr",23:35), pruning.mode="coarse")

## ----keepseqlevels---------------------------------------------------------
keepSeqlevels(gr, paste0("chr",1:22), pruning.mode="coarse")

## ----keepstdchr------------------------------------------------------------
keepStandardChromosomes(gr, pruning.mode="coarse")

## ----keepstdchr-2----------------------------------------------------------
plantgr <- GRanges(c(1:5,"MT","Pltd"), IRanges(1:7,width=5))
keepStandardChromosomes(plantgr, species="Arabidopsis thaliana",
                                 pruning.mode="coarse")

## ----Seqinfo-egs-----------------------------------------------------------
## Note that all the arguments (except 'genome') must have the
## same length. 'genome' can be of length 1, whatever the lengths
## of the other arguments are.
x <- Seqinfo(seqnames=c("chr1", "chr2", "chr3", "chrM"),
             seqlengths=c(100, 200, NA, 15),
             isCircular=c(NA, FALSE, FALSE, TRUE),
             genome="toy")
length(x)
seqnames(x)
names(x)
seqlevels(x)
seqlengths(x)
isCircular(x)
genome(x)

x[c("chrY", "chr3", "chr1")]  # subset by names

## Rename, drop, add and/or reorder the sequence levels:
xx <- x
seqlevels(xx) <- sub("chr", "ch", seqlevels(xx))  # rename
xx
seqlevels(xx) <- rev(seqlevels(xx))  # reorder
xx
seqlevels(xx) <- c("ch1", "ch2", "chY")  # drop/add/reorder
xx
seqlevels(xx) <- c(chY="Y", ch1="1", "22")  # rename/reorder/drop/add
xx

y <- Seqinfo(seqnames=c("chr3", "chr4", "chrM"),
             seqlengths=c(300, NA, 15))
y
merge(x, y)  # rows for chr3 and chrM are merged
suppressWarnings(merge(x, y))

## Note that, strictly speaking, merging 2 Seqinfo objects is not
## a commutative operation, i.e., in general 'z1 <- merge(x, y)'
## is not identical to 'z2 <- merge(y, x)'. However 'z1' and 'z2'
## are guaranteed to contain the same information (i.e. the same
## rows, but typically not in the same order):
suppressWarnings(merge(y, x))

## This contradicts what 'x' says about circularity of chr3 and chrM:
isCircular(y)[c("chr3", "chrM")] <- c(TRUE, FALSE)
y
if (interactive()) {
  merge(x, y)  # raises an error
}

## ----quick-style-----------------------------------------------------------
txdb <- TxDb.Dmelanogaster.UCSC.dm3.ensGene
seqlevels(txdb)
genomeStyles("Drosophila melanogaster")
mapSeqlevels(seqlevels(txdb), "NCBI")

## ----sequence, eval=FALSE--------------------------------------------------
#  sequence <- seqlevels(x)
#  
#  ## sequence is in UCSC format and we want NCBI style
#  newStyle <- mapSeqlevels(sequence,"NCBI")
#  newStyle <- newStyle[complete.cases(newStyle)] # removing NA cases.
#  
#  ## rename the seqlevels
#  x <- renameSeqlevels(x,newStyle)
#  
#  ## keep only the seqlevels you want (say autosomes)
#  auto <- extractSeqlevelsByGroup(species="Homo sapiens", style="NCBI",
#                                  group="auto")
#  x <- keepSeqlevels(x,auto)

## ----sessionInfo, results='asis', eval=TRUE--------------------------------
toLatex(sessionInfo())

Try the GenomeInfoDb package in your browser

Any scripts or data that you put into this service are public.

GenomeInfoDb documentation built on April 9, 2021, 6 p.m.