knitr::opts_chunk$set(fig.align = 'center') knitr::opts_chunk$set(fig.width = 8) knitr::opts_chunk$set(fig.height = 8)
Data import is performed similar to other examples.
# Load libraries library(vcfR) library(pinfsc50) # Determine file locations vcf_file <- system.file("extdata", "pinf_sc50.vcf.gz", package = "pinfsc50") # Read data into memory vcf <- read.vcfR(vcf_file, verbose = FALSE) vcf
ad <- extract.gt(vcf, element = 'AD') allele1 <- masplit(ad, record = 1) allele2 <- masplit(ad, record = 2) ad1 <- allele1 / (allele1 + allele2) ad2 <- allele2 / (allele1 + allele2)
dp <- allele1 #sums <- apply(dp, MARGIN=2, quantile, probs=c(0.15, 0.95), na.rm=TRUE) sums <- apply(dp, MARGIN=2, quantile, probs=c(0.1, 0.9), na.rm=TRUE) par(mfrow=c(4,3)) par(mar=c(2,2,1,1)) par(oma=c(1,1,0,0)) for(i in 1:12){ hist(allele1[,i], breaks = seq(0,1e3,by=1), xlim=c(0,100), col=8, main="", xlab="", ylab="") title(main = colnames(allele1)[i]) abline(v=sums[,i], col=2) } title(xlab = "Depth", line=0, outer = TRUE, font=2) title(ylab = "Count", line=0, outer = TRUE, font=2) par(mar=c(5,4,4,2)) par(oma=c(0,0,0,0))
par(mfrow=c(4,3)) par(mar=c(2,2,1,1)) par(oma=c(1,1,0,0)) for(i in 1:12){ tmp <- allele2[,i] tmp <- tmp[ tmp > 0 ] hist(tmp, breaks = seq(0,1e3,by=1), xlim=c(0,100), # ylim=c(0,1000), col=8, main="", xlab="", ylab="") title(main = colnames(allele1)[i]) } title(xlab = "Depth", line=0, outer = TRUE, font=2) title(ylab = "Count", line=0, outer = TRUE, font=2) par(mfrow=c(1,1)) par(mar=c(5,4,4,2)) par(oma=c(0,0,0,0))
# Filter on depth quantiles. sums <- apply(allele1, MARGIN=2, quantile, probs=c(0.1, 0.9), na.rm=TRUE) # Allele 1 dp2 <- sweep(allele1, MARGIN=2, FUN = "-", sums[1,]) #allele1[dp2 < 0] <- NA vcf@gt[,-1][ dp2 < 0 & !is.na(vcf@gt[,-1]) ] <- NA dp2 <- sweep(allele1, MARGIN=2, FUN = "-", sums[2,]) #allele1[dp2 > 0] <- NA vcf@gt[,-1][dp2 > 0] <- NA # Allele 2 dp2 <- sweep(allele2, MARGIN=2, FUN = "-", sums[1,]) vcf@gt[,-1][ dp2 < 0 & !is.na(vcf@gt[,-1]) ] <- NA dp2 <- sweep(allele2, MARGIN=2, FUN = "-", sums[2,]) vcf@gt[,-1][dp2 > 0] <- NA # Censor homozygotes. gt <- extract.gt(vcf, element = 'GT') hets <- is_het(gt) is.na( vcf@gt[,-1][ !hets ] ) <- TRUE # Extract allele depths ad <- extract.gt(vcf, element = 'AD') allele1 <- masplit(ad, record = 1) allele2 <- masplit(ad, record = 2) ad1 <- allele1 / (allele1 + allele2) ad2 <- allele2 / (allele1 + allele2) # Parameters #winsize <- 1e5 # winsize <- 2e5 #bin_width <- 0.1 #bin_width <- 0.05 #bin_width <- 0.025 # bin_width <- 0.02 #bin_width <- 0.01 # Find peaks freq1 <- ad1/(ad1+ad2) freq2 <- ad2/(ad1+ad2) myPeaks1 <- freq_peak(freq1, getPOS(vcf), winsize = winsize, bin_width = bin_width) #myCounts1 <- freq_peak(freq1, getPOS(vcf), winsize = winsize, bin_width = bin_width, count = TRUE) is.na(myPeaks1$peaks[myPeaks1$counts < 20]) <- TRUE myPeaks2 <- freq_peak(freq2, getPOS(vcf), winsize = winsize, bin_width = bin_width, lhs = FALSE) #myCounts2 <- freq_peak(freq2, getPOS(vcf), winsize = winsize, bin_width = bin_width, count = TRUE) is.na(myPeaks2$peaks[myPeaks2$counts < 20]) <- TRUE
Proof of concept. Major allele.
par(mfrow=c(4,4)) par(mar=c(2,2,1,1)) par(oma=c(1,1,0,0)) mySample <- "BL2009P4_us23" for(i in 1:4){ hist(freq1[ myPeaks1$wins[i,'START_row']:myPeaks1$wins[i,'END_row'], mySample ], breaks = seq(0,1,by=bin_width), xlim=c(0,1), col=8, main = "", xaxt='n') axis(side=1, at=c(0,0.25,0.333,0.5,0.666,0.75,1), labels=c(0,'1/4','1/3','1/2','2/3','3/4',1), las=1) abline(v=myPeaks1$peaks[i,mySample], col=2) if(i==2){ title(main=mySample) } } mySample <- "DDR7602" for(i in 1:4){ hist(freq1[ myPeaks1$wins[i,'START_row']:myPeaks1$wins[i,'END_row'], mySample ], breaks = seq(0,1,by=bin_width), xlim=c(0,1), col=8, main = "", xaxt='n') axis(side=1, at=c(0,0.25,0.333,0.5,0.666,0.75,1), labels=c(0,'1/4','1/3','1/2','2/3','3/4',1), las=1) abline(v=myPeaks1$peaks[i,mySample], col=2) if(i==2){ title(main=mySample) } } mySample <- "IN2009T1_us22" for(i in 1:4){ hist(freq1[ myPeaks1$wins[i,'START_row']:myPeaks1$wins[i,'END_row'], mySample ], breaks = seq(0,1,by=bin_width), xlim=c(0,1), col=8, main = "", xaxt='n') axis(side=1, at=c(0,0.25,0.333,0.5,0.666,0.75,1), labels=c(0,'1/4','1/3','1/2','2/3','3/4',1), las=1) abline(v=myPeaks1$peaks[i,mySample], col=2) if(i==2){ title(main=mySample) } } mySample <- "P17777us22" for(i in 1:4){ hist(freq1[ myPeaks1$wins[i,'START_row']:myPeaks1$wins[i,'END_row'], mySample ], breaks = seq(0,1,by=bin_width), xlim=c(0,1), col=8, main = "", xaxt='n') axis(side=1, at=c(0,0.25,0.333,0.5,0.666,0.75,1), labels=c(0,'1/4','1/3','1/2','2/3','3/4',1), las=1) abline(v=myPeaks1$peaks[i,mySample], col=2) if(i==2){ title(main=mySample) } } par(mfrow=c(1,1)) par(mar=c(5,4,4,2)) par(oma=c(0,0,0,0))
Second most abundant allele.
par(mfrow=c(4,4)) par(mar=c(2,2,1,1)) par(oma=c(1,1,0,0)) mySample <- "BL2009P4_us23" for(i in 1:4){ hist(freq2[ myPeaks1$wins[i,'START_row']:myPeaks1$wins[i,'END_row'], mySample ], breaks = seq(0,1,by=bin_width), xlim=c(0,1), col=8, main = "", xaxt='n') axis(side=1, at=c(0,0.25,0.333,0.5,0.666,0.75,1), labels=c(0,'1/4','1/3','1/2','2/3','3/4',1), las=1) abline(v=myPeaks2$peaks[i,mySample], col=2) if(i==2){ title(main=mySample) } } mySample <- "DDR7602" for(i in 1:4){ hist(freq2[ myPeaks1$wins[i,'START_row']:myPeaks1$wins[i,'END_row'], mySample ], breaks = seq(0,1,by=bin_width), xlim=c(0,1), col=8, main = "", xaxt='n') axis(side=1, at=c(0,0.25,0.333,0.5,0.666,0.75,1), labels=c(0,'1/4','1/3','1/2','2/3','3/4',1), las=1) abline(v=myPeaks2$peaks[i,mySample], col=2) if(i==2){ title(main=mySample) } } mySample <- "IN2009T1_us22" for(i in 1:4){ hist(freq2[ myPeaks1$wins[i,'START_row']:myPeaks1$wins[i,'END_row'], mySample ], breaks = seq(0,1,by=bin_width), xlim=c(0,1), col=8, main = "", xaxt='n') axis(side=1, at=c(0,0.25,0.333,0.5,0.666,0.75,1), labels=c(0,'1/4','1/3','1/2','2/3','3/4',1), las=1) abline(v=myPeaks2$peaks[i,mySample], col=2) if(i==2){ title(main=mySample) } } mySample <- "P17777us22" for(i in 1:4){ hist(freq2[ myPeaks2$wins[i,'START_row']:myPeaks1$wins[i,'END_row'], mySample ], breaks = seq(0,1,by=bin_width), xlim=c(0,1), col=8, main = "", xaxt='n') axis(side=1, at=c(0,0.25,0.333,0.5,0.666,0.75,1), labels=c(0,'1/4','1/3','1/2','2/3','3/4',1), las=1) abline(v=myPeaks2$peaks[i,mySample], col=2) if(i==2){ title(main=mySample) } } par(mfrow=c(1,1)) par(mar=c(5,4,4,2)) par(oma=c(0,0,0,0))
par(mfrow=c(4,3)) par(mar=c(2,2,1,1)) par(oma=c(1,1,0,0)) for(i in 1:12){ mySample <- colnames(freq1)[i] plot(getPOS(vcf), freq1[,mySample], ylim=c(0,1), type="n", yaxt='n', main = mySample, xlab = "POS", ylab = "Allele balance") axis(side=2, at=c(0,0.25,0.333,0.5,0.666,0.75,1), labels=c(0,'1/4','1/3','1/2','2/3','3/4',1), las=1) abline(h=c(0.25,0.333,0.5,0.666,0.75), col=8) points(getPOS(vcf), freq1[,mySample], pch = 20, col= "#A6CEE344") points(getPOS(vcf), freq2[,mySample], pch = 20, col= "#1F78B444") segments(x0=myPeaks1$wins[,'START_pos'], y0=myPeaks1$peaks[,mySample], x1=myPeaks1$wins[,'END_pos'], lwd=3) segments(x0=myPeaks1$wins[,'START_pos'], y0=myPeaks2$peaks[,mySample], x1=myPeaks1$wins[,'END_pos'], lwd=3) } par(mfrow=c(1,1)) par(mar=c(5,4,4,2)) par(oma=c(0,0,0,0))
i <- 2 layout(matrix(1:2, nrow=1), widths = c(4,1)) par(mar=c(5,4,4,0)) mySample <- colnames(freq1)[i] plot(getPOS(vcf), freq1[,mySample], ylim=c(0,1), type="n", yaxt='n', main = mySample, xlab = "POS", ylab = "Allele balance") axis(side=2, at=c(0,0.25,0.333,0.5,0.666,0.75,1), labels=c(0,'1/4','1/3','1/2','2/3','3/4',1), las=1) abline(h=c(0.25,0.333,0.5,0.666,0.75), col=8) points(getPOS(vcf), freq1[,mySample], pch = 20, col= "#A6CEE344") points(getPOS(vcf), freq2[,mySample], pch = 20, col= "#1F78B444") segments(x0=myPeaks1$wins[,'START_pos'], y0=myPeaks1$peaks[,mySample], x1=myPeaks1$wins[,'END_pos'], lwd=3) segments(x0=myPeaks1$wins[,'START_pos'], y0=myPeaks2$peaks[,mySample], x1=myPeaks1$wins[,'END_pos'], lwd=3) bp1 <- hist(freq1[,mySample], breaks = seq(0,1,by=bin_width), plot = FALSE) bp2 <- hist(freq2[,mySample], breaks = seq(0,1,by=bin_width), plot = FALSE) par(mar=c(5,1,4,2)) barplot(height=bp1$counts, width=0.02, space = 0, horiz = T, add = FALSE, col="#A6CEE3") barplot(height=bp2$counts, width=0.02, space = 0, horiz = T, add = TRUE, col="#1F78B4") par(mar=c(5,4,4,2)) par(mfrow=c(1,1))
for( i in 1:ncol(freq1) ){ #i <- 18 layout(matrix(1:2, nrow=1), widths = c(4,1)) par(mar=c(5,4,4,0)) mySample <- colnames(freq1)[i] plot(getPOS(vcf), freq1[,mySample], ylim=c(0,1), type="n", yaxt='n', main = mySample, xlab = "POS", ylab = "Allele balance") axis(side=2, at=c(0,0.25,0.333,0.5,0.666,0.75,1), labels=c(0,'1/4','1/3','1/2','2/3','3/4',1), las=1) abline(h=c(0.25,0.333,0.5,0.666,0.75), col=8) points(getPOS(vcf), freq1[,mySample], pch = 20, col= "#A6CEE344") points(getPOS(vcf), freq2[,mySample], pch = 20, col= "#1F78B444") segments(x0=myPeaks1$wins[,'START_pos'], y0=myPeaks1$peaks[,mySample], x1=myPeaks1$wins[,'END_pos'], lwd=3) segments(x0=myPeaks1$wins[,'START_pos'], y0=myPeaks2$peaks[,mySample], x1=myPeaks1$wins[,'END_pos'], lwd=3) bp1 <- hist(freq1[,mySample], breaks = seq(0,1,by=bin_width), plot = FALSE) bp2 <- hist(freq2[,mySample], breaks = seq(0,1,by=bin_width), plot = FALSE) par(mar=c(5,1,4,2)) barplot(height=bp1$counts, width=0.02, space = 0, horiz = T, add = FALSE, col="#A6CEE3") barplot(height=bp2$counts, width=0.02, space = 0, horiz = T, add = TRUE, col="#1F78B4") par(mar=c(5,4,4,2)) par(mfrow=c(1,1)) }
knitr::kable(myPeaks1$wins)
par(mfrow=c(1,2)) par(mar=c(2,2,1,1)) par(oma=c(1,1,1,0)) critical <- 1/4 - (1/3-1/4)/2 critical <- c(critical, 1/4 + (1/3-1/4)/2) critical <- c(critical, 1/2 - (2/3 - 1/2)/2) critical <- c(critical, 1/2 + (2/3 - 1/2)/2) critical <- c(critical, 3/4 - (1/3-1/4)/2) critical <- c(critical, 3/4 + (1/3-1/4)/2) for( j in 1:ncol(freq1) ){ mySample <- colnames(freq1)[j] for( i in 1:nrow(myPeaks1$wins) ){ hist(freq2[ myPeaks1$wins[i,'START_row']:myPeaks1$wins[i,'END_row'], mySample ], breaks = seq(0,1,by=bin_width), xlim=c(0,1), col=8, main = "", xaxt='n') axis(side=1, at=c(0,0.25,0.333,0.5,0.666,0.75,1), labels=c(0,'1/4','1/3','1/2','2/3','3/4',1), las=3) abline(v=myPeaks2$peaks[i,mySample], col=2, lwd=2) abline(v=critical, col="#228B22") hist(freq1[ myPeaks1$wins[i,'START_row']:myPeaks1$wins[i,'END_row'], mySample ], breaks = seq(0,1,by=bin_width), xlim=c(0,1), col=8, main = "", xaxt='n') axis(side=1, at=c(0,0.25,0.333,0.5,0.666,0.75,1), labels=c(0,'1/4','1/3','1/2','2/3','3/4',1), las=1) abline(v=myPeaks1$peaks[i,mySample], col=2, lwd=2) abline(v=critical, col="#228B22") mtext(paste("Window", i), side=4) title(main=mySample, outer = TRUE) } } par(mfrow=c(1,1)) par(mar=c(5,4,4,2)) par(oma=c(0,0,0,0))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.