inst/doc/AllelicImbalance-vignette.R

## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()

## ----functions, include=FALSE-------------------------------------------------
# A function for captioning and referencing images
fig <- local({
    i <- 0
    ref <- list()
    list(
        cap=function(refName, text) {
            i <<- i + 1
            ref[[refName]] <<- i
            #paste("Figure ", i, ": ", text, sep="")
            paste(text, sep="")
        },
        ref=function(refName) {
            ref[[refName]]
        })
})

## ----echo = TRUE, eval = TRUE, message = FALSE--------------------------------
library(AllelicImbalance)

## ----echo = TRUE, eval = TRUE-------------------------------------------------
searchArea <- GRanges(seqnames = c("17"), ranges = IRanges(79478301, 79478361))
pathToFiles <- system.file("extdata/ERP000101_subset", 
								package = "AllelicImbalance")
reads <- impBamGAL(pathToFiles, searchArea, verbose = FALSE)
heterozygotePositions <- scanForHeterozygotes(reads, verbose = FALSE)
countList <- getAlleleCounts(reads, heterozygotePositions, verbose = FALSE)
a.simple <- ASEsetFromCountList(heterozygotePositions, countList)
a.simple

## ----echo = TRUE, eval = TRUE, warning = FALSE--------------------------------
library(VariantAnnotation)
pathToVcf <- paste(pathToFiles,"/ERP000101.vcf",sep="")
VCF <- readVcf(pathToVcf,"hg19") 
gr <- granges(VCF) 

#only use bi-allelic positions
gr.filt <- gr[width(mcols(gr)[,"REF"]) == 1 | 
				unlist(lapply(mcols(gr)[,"ALT"],width))==1]

countList <- getAlleleCounts(reads, gr.filt, verbose=FALSE) 
a.vcf <- ASEsetFromCountList(rowRanges = gr, countList)

## ----echo = TRUE, eval = TRUE-------------------------------------------------
BcfGR <- impBcfGR(pathToFiles,searchArea,verbose=FALSE)
countListBcf <- getAlleleCounts(reads, BcfGR,verbose=FALSE)
a.bcf <- ASEsetFromCountList(BcfGR, countListBcf)

## ----echo = TRUE, eval = TRUE-------------------------------------------------
plus <- getAlleleCounts(reads, heterozygotePositions, strand="+",verbose=F) 
minus <- getAlleleCounts(reads, heterozygotePositions, strand="-",verbose=F)

a.stranded <-
ASEsetFromCountList(
heterozygotePositions,
countListPlus=plus,
countListMinus=minus
)
a.stranded

## ----echo = TRUE, eval = TRUE, message = FALSE, results = 'hide'--------------
#Getting searchArea from genesymbol
library(org.Hs.eg.db )
searchArea<-getAreaFromGeneNames("ACTG1",org.Hs.eg.db)

#Getting rs-IDs
library(SNPlocs.Hsapiens.dbSNP144.GRCh37)
gr <- rowRanges(a.simple)
updatedGRanges<-getSnpIdFromLocation(gr, SNPlocs.Hsapiens.dbSNP144.GRCh37)
rowRanges(a.simple)<-updatedGRanges

## ----echo=TRUE, eval=TRUE-----------------------------------------------------
#simulate phenotype data
pdata <- DataFrame(
	Treatment=sample(c("ChIP", "Input"),length(reads),replace=TRUE),
	Gender=sample(c("male", "female"),length(reads),replace=TRUE), 
	row.names=paste("individual",1:length(reads),sep=""))

#make new ASEset with pdata
a.new <- ASEsetFromCountList(
		heterozygotePositions,
		countList,
		colData=pdata)

#add to existing object
colData(a.simple) <- pdata

## ----echo=TRUE, eval=TRUE-----------------------------------------------------
#infer and add genotype require declaration of the reference allele 
ref(a.simple) <- randomRef(a.simple)
genotype(a.simple) <- inferGenotypes(a.simple)

#access to genotype information requires not only the information about the
#reference allele but also the alternative allele
alt(a.simple) <- inferAltAllele(a.simple)
genotype(a.simple)[,1:4]

## ----echo=TRUE, eval=TRUE-----------------------------------------------------
#construct an example phase matrix 
set.seed(1)
rows <-nrow(a.simple)
cols <- ncol(a.simple)
p1 <- matrix(sample(c(1,0),replace=TRUE, size=rows*cols), nrow=rows, ncol=cols)
p2 <- matrix(sample(c(1,0),replace=TRUE, size=rows*cols), nrow=rows, ncol=cols)
phase.mat <- matrix(paste(p1,sample(c("|","|","/"), size=rows*cols, 
						replace=TRUE), p2, sep=""),	nrow=rows, ncol=cols)

phase(a.simple) <- phase.mat 

## ----echo=TRUE, eval=TRUE-----------------------------------------------------
#load VariantAnnotation and use the phase information from a Vcf file
pathToVcf <- system.file("extdata/ERP000101_subset/ERP000101.vcf", 
							package = "AllelicImbalance")
p <- readGT(pathToVcf)
#The example Vcf file contains only 19 out of our 20 samples
#So we have to subset and order
a.subset <- a.simple[,colnames(a.simple) %in% colnames(p)]
p.subset <- p[, colnames(p) %in% colnames(a.subset)]
p.ordered <- p.subset[ , match(colnames(a.subset), colnames(p.subset))]

## ----echo=TRUE, eval=TRUE-----------------------------------------------------
#from simulated data
ref(a.simple) <- c("G","T","C")

#infer and set alternative allele
alt <- inferAltAllele(a.simple)
alt(a.simple) <- alt

#from reference genome
data(ASEset.sim)
fasta <- system.file('extdata/hg19.chr17.subset.fa', package='AllelicImbalance')
ref <- refAllele(ASEset.sim,fasta=fasta) 
ref(ASEset.sim) <- ref

## ----echo=TRUE, eval=TRUE-----------------------------------------------------
#make an example countList including a global sample of SNPs
set.seed(1)
countListUnknown <- list()
samples <- paste(rep("sample",10),1:10,sep="")
snps <- 1000
for(snp in 1:snps){
	count<-matrix(0, nrow=length(samples), ncol=4, 
					dimnames=list(samples, c('A','T','G','C')))
	alleles <- sample(1:4, 2)
	for(sample in 1:length(samples)){
		count[sample, alleles] <- as.integer(rnorm(2,mean=50,sd=10))
	}
	countListUnknown[[snp]] <- count
}

#make example rowRanges for the corresponding information
gr <- GRanges(
	seqnames = Rle(sample(paste("chr",1:22,sep=""),snps, replace=TRUE)),
	ranges = IRanges(1:snps, width = 1, names= paste("snp",1:snps,sep="")),
	strand="*"
)

#make ASEset
a <- ASEsetFromCountList(gr, countListUnknown=countListUnknown)

#set a random allele as reference
ref(a) <- randomRef(a)
genotype(a) <- inferGenotypes(a)

#get the fraction of the reference allele
refFrac <- fraction(a, top.fraction.criteria="ref")

#check mean
mean(refFrac)


## ----echo=TRUE, eval=TRUE-----------------------------------------------------
#set ref allele
ref(a.stranded) <- c("G","T","C")

#binomial test
binom.test(a.stranded[,5:8], "+")
#chi-square test
chisq.test(a.stranded[,5:8], "-")

## ----echo=TRUE, eval=TRUE, message=FALSE--------------------------------------
# in this example every snp is on separate exons
region <- granges(a.simple)
rs <- regionSummary(a.simple, region)
rs
basic(rs)

## ----echo=TRUE, eval=TRUE, fig.cap=fig$cap("barplotPlusStrandDefault", "The red bars show how many reads with the G-allele that overlaps the snp at position chr17:79478331, and the green bars show how many reads with the T allele that overlaps.")----
barplot(a.stranded[2], strand="+", xlab.text="", legend.interspace=2)

## ----echo=TRUE, eval=TRUE, fig.cap=fig$cap("barplotPlusStrandPsub", "Here the default chi-square p-values have been replaced by p-values from binomial tests.")----
#use another source of p-values
btp <- binom.test(a.stranded[1],"+")
barplot(a.stranded[2], strand="+", testValue=t(btp), xlab.text="",
			 legend.interspace=2)

## ----echo=TRUE, eval=TRUE-----------------------------------------------------
init <- c(15,20)
for(i in c(1,2,4,6)){
	bp <- signif(binom.test(init*i, p=0.5)$p.value,3)
	cp <- signif(chisq.test(init*i, p=c(0.5,0.5))$p.value, 3)
	cat(paste("A: ", init[1]*i , " B: ", init[2]*i, " binom p: ", bp, "chisq p: ", cp,"\n"))
}

## ----echo=TRUE, eval=TRUE, fig.cap=fig$cap("barplotFraction", "A barplot with type='fraction'. Each bar represents one sample and by default the most expressed allele over all samples is on the top, here in green. The black line denotes 1:1 expression of the alleles.")----
barplot(a.simple[2], type="fraction", cex.main = 0.7)

## ----echo=TRUE, eval=TRUE, fig.cap=fig$cap("barplotSampleColour", "A barplot with allele fractions, additionally colored by gender.")----
#top and bottom colour
sampleColour.top <-rep("palevioletred",ncol(a.simple))
sampleColour.top[colData(a.simple)[,"Gender"]%in%"male"] <- "darkgreen"
sampleColour.bot <- rep("blue",ncol(a.simple))
sampleColour.bot[colData(a.simple)[,"Gender"]%in%"male"] <- "seashell2"
barplot(a.simple[2], type="fraction", sampleColour.top=sampleColour.top,
	 sampleColour.bot=sampleColour.bot, cex.main = 0.7)

## ----echo=TRUE, eval=TRUE, message=FALSE, fig.cap=fig$cap("barplotAnnotation", "A barplot with Gene information extracted from the org.Hs.eg.db package")----
library(org.Hs.eg.db)
barplot(a.simple[1],OrgDb=org.Hs.eg.db,
			cex.main = 0.7,
			xlab.text="",
			ypos.annotation = 0.08,
			annotation.interspace=1.3,
			legend.interspace=1.5
			)

## ----echo=TRUE, eval=TRUE, message=FALSE, fig.cap=fig$cap("barplotRef", "A barplot with the reference allele as top fraction, top.fraction.criteria='ref' ")----
#load data
data(ASEset)

#using reference and alternative allele information
alt(ASEset) <- inferAltAllele(ASEset)
barplot(ASEset[1], type="fraction", strand="+", xlab.text="", 
			top.fraction.criteria="ref", cex.main=0.7)

## ----echo=TRUE, eval=TRUE, message=FALSE, fig.cap=fig$cap("barplotPhase", "A barplot with the maternal phase as top fraction, top.fraction.criteria='phase' ")----
#using phase information
phase(ASEset) <- phase.mat 
barplot(ASEset[1], type="fraction", strand="+", xlab.text="", 
			top.fraction.criteria="phase", cex.main=0.7)

## ----echo=TRUE, eval=TRUE, message=FALSE, fig.cap=fig$cap("locationplotDefault", "A locationplot with countbars displaying a region of SNPs")----
# locationplot using count type
a.stranded.sorted <- sort(a.stranded, decreasing=FALSE)
locationplot(a.stranded.sorted, type="count", cex.main=0.7, cex.legend=0.4)

## ----echo=TRUE, eval=TRUE, message=FALSE, fig.cap=fig$cap("locationplotAnnotation", "A locationplot with Gene and Transcript information extracted from the org.Hs.eg.db and TxDb.Hsapiens.UCSC.hg19.knownGene packages")----
# locationplot annotation
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
locationplot(a.stranded.sorted, OrgDb=org.Hs.eg.db, 
				TxDb=TxDb.Hsapiens.UCSC.hg19.knownGene)

## ----echo=TRUE, eval=TRUE, message=FALSE, fig.cap=fig$cap("gbarplotcount", "A gbarplot with type='count'")----
#gbarplots with type "count" 
gbarplot(ASEset, type="count")

## ----echo=TRUE, eval=TRUE, message=FALSE, fig.cap=fig$cap("gbarplotfraction", "A gbarplot with type='fraction'")----
# gbarplots with type "fraction" 
gbarplot(ASEset, type="fraction")

## ----echo=TRUE, eval=TRUE, message=FALSE, fig.cap=fig$cap("glocationplot", "A glocationplot, showing the ASE of SNPs within a defined region")----
#remember to set the genome
genome(ASEset) <- "hg19"

glocationplot(ASEset, strand='+')


## ----echo=TRUE, eval=TRUE, message=FALSE, fig.cap=fig$cap("glocationplot", "A custom built Gviz plot using the ASEDAnnotationTrack, showing the ASE of SNPs within a defined region together with read coverage information")----

#load Gviz package
library(Gviz)

#subset ASEset and reads
x <- ASEset[,1:2]
r <- reads[1:2]
seqlevels(r, pruning.mode="coarse") <- seqlevels(x)

GR <- GRanges(seqnames=seqlevels(x),
 		ranges=IRanges(start=min(start(x)), end=max(end(x))),
 		strand='+', genome=genome(x))

deTrack <- ASEDAnnotationTrack(x, GR=GR, type='fraction', strand='+')
covTracks <- CoverageDataTrack(x, BamList=r, strand='+') 
lst <- c(deTrack,covTracks)
sizes <- c(0.5,rep(0.5/length(covTracks),length(covTracks)))

plotTracks(lst, from=min(start(x)), to=max(end(x)), sizes=sizes, 
				col.line = NULL, showId = FALSE, main = 'main', cex.main = 1, 
				title.width = 1, type = 'histogram')

## ----echo = TRUE, eval = TRUE, message = FALSE--------------------------------
data(ASEset.old)
#this command doesnt work anymore, for some reason(Will ask the mail-list)
#ASEset.new <- updateObject(ASEset.old)

## ----echo=TRUE, eval=TRUE-----------------------------------------------------
sessionInfo()

Try the AllelicImbalance package in your browser

Any scripts or data that you put into this service are public.

AllelicImbalance documentation built on Nov. 8, 2020, 6:52 p.m.