#' loadVcfFileForGene
#'
#' Function to load vcf file for a particular gene and transcrit, given a transcript object as
#' returned by VEP EPI. Requires: VariantAnnotation, BiocGenerics
#' @param vcfFile File name to load
#' @param transcript VEP transcript object or list. Needs to contain 'id', 'seq_region_name','start' and 'end' elements
#' @param includeFilteredSites Return filtered-out sites (i.e. sites marked with anything other than PASS on vcf). Defaults to FALSE
#' @param n Max number of records to load. Set to -1 for no limit.
#' @param refVersion Reference version. Defaults to "GRCh37".
#' @param returnGenotypes Return genotype data. Default to FALSE.
#' @keywords cats
#' @export
#' @examples
#' exacdata<- loadVcfFileForGene('exac.vcf',transcript)
loadVcfFileForGene <- function(vcfFile, transcript,
includeFilteredSites=F,
n=-1,
refVersion="GRCh37",
returnGenotypes=F){
chr <- transcript$seq_region_name
start <- transcript$start
end <- transcript$end
if (start > end) {
tmp=end
end=start
start=tmp
}
vdata <- loadVcfFileForRegion(vcfFile, chr, start, end, includeFilteredSites,n,refVersion,returnGenotypes)
vdata
}
#' loadVcfFileForRegion
#'
#' Function to load vcf file for a particular genomic region
#' @param vcfFile File name to load
#' @param chr Chromosome/contig to load
#' @param start Initial genomic position to load
#' @param end Final genomic position to load
#' @param includeFilteredSites Return filtered-out sites (i.e. sites marked with anything other than PASS on vcf). Defaults to FALSE
#' @param n Max number of records to load. Set to -1 for no limit.
#' @param refVersion Reference version. Defaults to "GRCh37".
#' @param returnGenotypes Return genotype data. Default to FALSE.
#' @importFrom GenomicRanges as.data.frame
#' @keywords cats
#' @export
#' @examples
#' exacdata<- loadVcfFileForRegion('exac.vcf',"1",12345600,13345600)
loadVcfFileForRegion <- function(vcfFile, chr, start, end,
includeFilteredSites=F,
n=-1,
refVersion="GRCh37",
returnGenotypes=F){
vRanges <- GRanges(as.character(chr),
IRanges(start=start, end=end,names = as.character(chr)))
tf <- TabixFile(vcfFile)
vdat <- readVcf(tf,refVersion, vRanges,row.names=F)
r <- rowRanges(vdat)
# clobber row names in case there are duplicate row entries which will make data frame conversion fail
#names(r) <- as.character(1:length(names(r)))
vRanges <- as.data.frame(r)
vInfo <- info(vdat)
vData <- cbind(vRanges,vInfo)
if (!includeFilteredSites) {
vData <- vData[vData$FILTER=="PASS",]
}
if (n>0) vData<-vData[1:n,]
if (returnGenotypes) {
list(siteData=vData,genotypeData=geno(vdat)$GT, sampleData=colData(vdat))
}
else vData
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.