Description Usage Arguments Value Examples
Get alternative allele count for positions of interest
1 | getAlleleCount(alleleInfo, bamFile, indexFile, verbose = F)
|
alleleInfo |
data.frame with positions as chr:pos, first column as reference amino acid, second column as alternative amino acid |
bamFile |
bam file |
indexFile |
bai index file |
verbose |
Boolean of whether or not to print progress and info |
altAlleleCount alternative allele count information for each position of interest refAlleleCount reference allele count information for each position of interest
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | ## Not run:
Get germline hets from ExAC database or output from GATK for example
vcfFile <- "data-raw/ExAC.r0.3.sites.vep.vcf.gz"
# example with region of chromosome
chr <- 1
testRanges <- GRanges(chr, IRanges(start = 80000000, width=10000000))
param = ScanVcfParam(which=testRanges)
vcf <- readVcf(vcfFile, "hg19", param=param)
# common snps by MAF
info <- as.data.frame(info(vcf))
print(dim(info))
print(head(info))
maf <- info[, 'AF'] # AF is Integer allele frequency for each Alt allele
print("number of snps with maf > 0.1:")
vi <- sapply(maf, function(x) any(x > 0.1))
print(table(vi))
# convert to alleleInfo
snpsDf <- as.data.frame(rowData(vcf)[vi,])
alleleInfo <- data.frame(
'contig' = paste0('chr', as.character(snpsDf[,1])),
'position' = as.numeric(snpsDf[,2]),
'ref_allele' = as.character(snpsDf$REF),
'alt_allele' = sapply(snpsDf$ALT, function(i) paste(as.character(i), collapse=',')),
stringsAsFactors = FALSE
)
alleleInfo <- cbind(alleleInfo, 'AF'=maf[vi])
# get rid of non single nucleotide changes
vi <- sapply(alleleInfo$ref_allele, nchar) == 1
alleleInfo <- alleleInfo[vi,]
# also gets rid of sites with multiple alt alleles though...hard to know which is in our patient
vi <- sapply(alleleInfo$alt_allele, nchar) == 1
alleleInfo <- alleleInfo[vi,]
# fix chromosome name
alleleInfo[,1] <- gsub('chr', '', alleleInfo[,1])
# Now that we have putative heterozygous germline SNPs
# we can get the coverage at these SNP sites from our bams
print("Getting allele counts...")
path <- 'bams/'
files <- list.files(path = path)
files <- files[grepl('.bam$', files)]
alleleCounts <- lapply(files, function(f) {
print(f)
bamFile <- paste0(path, f)
indexFile <- paste0(path, paste0(f, '.bai'))
getAlleleCount(alleleInfo, bamFile, indexFile)
})
altCounts <- do.call(cbind, lapply(1:length(alleleCounts), function(i) alleleCounts[[i]][[1]]))
refCounts <- do.call(cbind, lapply(1:length(alleleCounts), function(i) alleleCounts[[i]][[2]]))
colnames(altCounts) <- colnames(refCounts) <- files
## End(Not run)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.