#' getClinvarData
#'
#' Function to load from HGMD MySQL database. Assumes data schema as published by HGMD. Requires RMySQL package
#' @param clinvarFile Gene name to load
#' @param transcript VEP transcript object or list. Needs to contain 'id', 'seq_region_name','start' and 'end' elements
#' @param password Return filtered-out sites (i.e. sites marked with anything other than PASS on vcf). Defaults to FALSE
#' @param host Max number of records to load. Set to -1 for no limit.
#' @param port Reference version. Defaults to "GRCh37".
#' @param dbname Reference version. Defaults to "GRCh37".
#' @keywords cats
#' @export
#' @examples
#' hgmddata<- getClinvarData('clinvar.vcf',transcript)
getClinvarData <- function(clinvarFile, transcript, useTableData) {
if (useTableData) {
clinvarTableData <- read.table(clinvarFile,
header = T,sep = "\t",comment.char = "")
clinvarGeneData <- subset(clinvarTableData,Assembly=="GRCh37" & GeneSymbol == geneName)
clinvarGeneData <- clinvarGeneData[grep("Benign",clinvarGeneData$ClinicalSignificance,invert = T,ignore.case = T),]
#clinvarGeneData <- clinvarGeneData[grep(clinvarPrefix,clinvarGeneData$Name, fixed=T),]
clinvarMutations <- sub("^NM.*:(c.*)","\\1",clinvarGeneData$HGVS.c..,perl = T)
#v2 <- sub("\\s.*" ,"", v1, perl=T)
clinvarMutations
} else {
packedData<-loadVcfFileForGene(clinvarFile, transcript,n = -1,includeFilteredSites = T)
unpackData(packedData)
}
}
unpackData <- function(packedData) {
init=F
# unpack multiallelic data and form nicely shaped data frame
for (i in 1:nrow(packedData) ) {
# expand multiallelics
unpackedRow<-unpackRow(packedData[i,])
if (!init) {
unpackedData=unpackedRow
init=T
} else unpackedData=rbind(unpackedData,unpackedRow)
}
unpackedData
}
unpackRow <- function(vcfRow) {
#print(vcfRow)
# unpack multiallelics into one row per alt allele
fieldsToUnpack <- names(vcfRow)[sapply(sapply(vcfRow,unlist),length)>1]
df <- as.data.frame(vcfRow)
# build DF with all fields except the ones we need to unpack and convert from list to scalar
altIdx=which(names(vcfRow) %in% fieldsToUnpack)
newRows <- if (length(altIdx)>0) df[,-altIdx] else df
altAlleles<-as.character(vcfRow$ALT[[1]])
allAlleles <- c(newRows$REF,altAlleles)
commonBases <- sapply(altAlleles,function(x,ref) commonSubstring(c(ref,x)), newRows$REF)
for (i in 1:length(altAlleles)) {
newDF<-newRows
# change alt,
newDF$ALT=altAlleles[[i]]
# for all columns which are lists, pick element i
for (fi in fieldsToUnpack) {
valList=df[,fi]
#print(valList)
newDF[,fi]=valList[[1]][i]
}
#newDF <- newDF[0,]
result<- if (i==1) newDF else rbind(result,newDF)
}
result
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.