getCoverage: Get coverage count for positions of interest

Description Usage Arguments Value Examples

Description

Get coverage count for positions of interest

Usage

1
getCoverage(alleleInfo, bamFile, indexFile, verbose = F)

Arguments

alleleInfo

data.frame with positions as chr:pos, first column as reference amino acid, second column as alternative amino acid

bamFile

bam file

indexFile

bai index file

verbose

Boolean of whether or not to print progress and info

Value

totCount Total coverage count information for each position of interest

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
## Not run: 
Get germline hets from ExAC database or output from GATK for example
vcfFile <- "data-raw/ExAC.r0.3.sites.vep.vcf.gz"
# example with region of chromosome
chr <- 1
testRanges <- GRanges(chr, IRanges(start = 80000000, width=10000000))
param = ScanVcfParam(which=testRanges)
vcf <- readVcf(vcfFile, "hg19", param=param)
# common snps by MAF
info <- as.data.frame(info(vcf))
print(dim(info))
print(head(info))
maf <- info[, 'AF'] # AF is Integer allele frequency for each Alt allele
print("number of snps with maf > 0.1:")
vi <- sapply(maf, function(x) any(x > 0.1))
print(table(vi))
# convert to alleleInfo
snpsDf <- as.data.frame(rowData(vcf)[vi,])
alleleInfo <- data.frame(
    'contig' = paste0('chr', as.character(snpsDf[,1])),
     'position' = as.numeric(snpsDf[,2]),
     'ref_allele' = as.character(snpsDf$REF),
     'alt_allele' = sapply(snpsDf$ALT, function(i) paste(as.character(i), collapse=',')),
     stringsAsFactors = FALSE
)
alleleInfo <- cbind(alleleInfo, 'AF'=maf[vi])
# get rid of non single nucleotide changes
vi <- sapply(alleleInfo$ref_allele, nchar) == 1
alleleInfo <- alleleInfo[vi,]
# also gets rid of sites with multiple alt alleles though...hard to know which is in our patient
vi <- sapply(alleleInfo$alt_allele, nchar) == 1
alleleInfo <- alleleInfo[vi,]
# fix chromosome name
alleleInfo[,1] <- gsub('chr', '', alleleInfo[,1])
# Now that we have putative heterozygous germline SNPs
# we can get the coverage at these SNP sites from our bams
print("Getting coverage...")
path <- 'bams/'
files <- list.files(path = path)
files <- files[grepl('.bam$', files)]
cov <- do.call(cbind, lapply(files, function(f) {
    print(f)
    bamFile <- paste0(path, f)
    indexFile <- paste0(path, paste0(f, '.bai'))
    getCoverage(alleleInfo, bamFile, indexFile)
}))
colnames(cov) <- files

## End(Not run)

JEFworks/badger documentation built on May 7, 2019, 7:40 a.m.