knitr::opts_chunk$set(echo = TRUE) knitr::opts_chunk$set(fig.align = 'center') knitr::opts_chunk$set(fig.width = 8) knitr::opts_chunk$set(fig.height = 8)
Creating histograms of allele balance can provide a quick perspective on the quality of a sequenced genome as well as insights into whether there may be ploidy variation within an individual. The creation of these plots is covered elsewhere. Here I demonstrate how to automate this process over all samples from a VCF file.
# Load libraries library(vcfR) library(pinfsc50) # Determine file locations vcf_file <- system.file("extdata", "pinf_sc50.vcf.gz", package = "pinfsc50") # Read data into memory vcf <- read.vcfR(vcf_file, verbose = FALSE) vcf
ad <- extract.gt(vcf, element = 'AD') allele1 <- masplit(ad, record = 1) allele2 <- masplit(ad, record = 2)
Create a confidence interval based on allele depth.
#sums <- apply(allele1, MARGIN=2, quantile, probs=c(0.15, 0.95), na.rm=TRUE) sums <- apply(allele1, MARGIN=2, quantile, probs=c(0.1, 0.9), na.rm=TRUE) # Allele 1 dp2 <- sweep(allele1, MARGIN=2, FUN = "-", sums[1,]) #allele1[dp2 < 0] <- NA vcf@gt[,-1][ dp2 < 0 & !is.na(vcf@gt[,-1]) ] <- NA dp2 <- sweep(allele1, MARGIN=2, FUN = "-", sums[2,]) #allele1[dp2 > 0] <- NA vcf@gt[,-1][dp2 > 0] <- NA # Allele 2 dp2 <- sweep(allele2, MARGIN=2, FUN = "-", sums[1,]) vcf@gt[,-1][ dp2 < 0 & !is.na(vcf@gt[,-1]) ] <- NA dp2 <- sweep(allele2, MARGIN=2, FUN = "-", sums[2,]) vcf@gt[,-1][dp2 > 0] <- NA # Hard filter #dp[dp < 4] <- NA #vcf@gt[,-1][allele1 < 8] <- NA
gt <- extract.gt(vcf, element = 'GT') hets <- is_het(gt) is.na( ad[ !hets ] ) <- TRUE allele1 <- masplit(ad, record = 1) allele2 <- masplit(ad, record = 2) ad1 <- allele1 / (allele1 + allele2) ad2 <- allele2 / (allele1 + allele2)
library(RColorBrewer) #i <- 1 #brewer.pal(n=12, name = "Paired") col1 <- seq(1,ncol(ad2)*2, by=2) %% 8 col2 <- col1 + 1 for(i in 1:ncol(ad2)){ hist(ad2[,i], breaks = seq(0,1,by=0.02), col = brewer.pal(n=12, name = "Paired")[ col1[i] ], xaxt="n", main=colnames(ad2)[i], xlab="") hist(ad1[,i], breaks = seq(0,1,by=0.02), col = brewer.pal(n=12, name = "Paired")[ col2[i] ], add = TRUE) axis(side=1, at=c(0,0.25,0.333,0.5,0.666,0.75,1), labels=c(0,"1/4","1/3","1/2","2/3","3/4",1)) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.