knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(fig.align = 'center')
knitr::opts_chunk$set(fig.width = 8)
knitr::opts_chunk$set(fig.height = 6)

Heterozygosity is one of the first perspectives on an individuals genetic constitution.

Data input

# Load libraries
library(vcfR)
library(pinfsc50)

# Determine file locations
vcf_file <- system.file("extdata", "pinf_sc50.vcf.gz",
                        package = "pinfsc50")

# Read data into memory
vcf <- read.vcfR(vcf_file, verbose = FALSE)
vcf

gt <- extract.gt(vcf, element = "GT")
ad <- extract.gt(vcf, element = "AD")
allele1 <- masplit(ad, record = 1)
allele2 <- masplit(ad, record = 2)
mySample <- 1
allele.df <- as.data.frame(allele1[,mySample] + allele2[,mySample])
names(allele.df) <- "sum"
allele.df$ratio <- allele1[,mySample]/allele.df$sum
allele.df$gt <- as.numeric(as.factor(gt[,mySample]))


library(RColorBrewer)

palette( paste(brewer.pal(name = "Dark2", n=8), "44", sep = "") )


plot(allele.df$sum, jitter(allele.df$ratio,amount = 0.002), col=allele.df$gt, pch = 20)

palette("default")

Process data

# Extract depths
dp <- extract.gt(vcf, element = 'DP', as.numeric = TRUE)
mudp <- apply(dp, MARGIN = 2, mean, na.rm = TRUE)

ad <- extract.gt(vcf, element = 'AD')

allele1 <- masplit(ad, record = 1)
allele2 <- masplit(ad, record = 2)

ad1 <- allele1 / (allele1 + allele2)
ad2 <- allele2 / (allele1 + allele2)

# Filter on depth quantiles.
sums <- apply(allele1, MARGIN=2, quantile, probs=c(0.1, 0.9), na.rm=TRUE)
# Allele 1
dp2 <- sweep(allele1, MARGIN=2, FUN = "-", sums[1,])
#allele1[dp2 < 0] <- NA
vcf@gt[,-1][ dp2 < 0 & !is.na(vcf@gt[,-1]) ] <- NA
dp2 <- sweep(allele1, MARGIN=2, FUN = "-", sums[2,])
#allele1[dp2 > 0] <- NA
vcf@gt[,-1][dp2 > 0] <- NA
# Allele 2
dp2 <- sweep(allele2, MARGIN=2, FUN = "-", sums[1,])
vcf@gt[,-1][ dp2 < 0 & !is.na(vcf@gt[,-1]) ] <- NA
dp2 <- sweep(allele2, MARGIN=2, FUN = "-", sums[2,])
vcf@gt[,-1][dp2 > 0] <- NA

# Censor homozygotes.
gt <- extract.gt(vcf, element = 'GT')
hets <- is_het(gt)
is.na( vcf@gt[,-1][ !hets ] ) <- TRUE

hettot <- colSums(hets)

Plot

par(mar=c(8,4,4,2))

barplot( hettot, las=3 )
title( ylab = "Heterozygous positions" )

barplot( mudp, las=3 )
title( ylab = "Mean depth (DP)" )

par(mar=c(5,4,4,2))
lm1 <- lm( hettot ~ mudp )
plot(mudp, hettot, xlab = "Mean DP", ylab = "Heterozygous positions")
abline(lm1)
summary(lm1)


knausb/vcfR_docs documentation built on May 20, 2019, 12:52 p.m.