#' produceSequenceFromHGVSCode
#'
#' Given an HGVS mutation code, produce corresponding cDNA and amino-acid sequence,
#' starting from the ATG initiator codon and up to STOP codon. If mutation clobbers the STOP codon,
#' sequence will be produced until cDNA string runs out.
#' @param hgvs Character string with HGVS mutation code
#' @param transcriptID Character string with ENST ID to which hgvs code corresponds
#' @param wildTypeCDNA Character string with WT reference cDNA sequence
#' @param lowerCaseChange Highlight change in lowercase
#' @param debug Output debug info
#' @return list with elements cdna and aa representing new cDNA and amino acid mutated sequence
#' @import Biostrings
#' @keywords cats
#' @export
#' @examples
#' str <- produceSequenceFromHGVSCode('c.571G>C','ENST00000374840',wtCDNA)
produceSequenceFromHGVSCode <- function(hgvs, transcriptID, wildTypeCDNA, lowerCaseChange=F,debug=F) {
# call VEP to get: start/stop positions and substitutions. Trivial for SNV's but makes life easier
# for indels or complex substitutions
vepData <- getVEPFromHGVS(hgvs,transcriptID,verbose=T,includeOnlyCanonicalTranscript=T)
offset = vepData$transcript_consequences$cds_start
alstr <- vepData$allele_string
a<-strsplit(x = alstr,split="/")
ref <- a[[1]][1]
alt <- a[[1]][2]
if (alt=="-") alt=""
if (ref=="-") {
ref=""
offset <- offset+1
}
#eventLength <- vepData$end-vepData$start # produces 0 for snps, delLength-1 for deletions,insLength for insertions,
eventLength <- nchar(alt)-nchar(ref)
if (lowerCaseChange) alt<-tolower(alt)
# sanity check: ref must match cDNA string content
if (debug) {
print(offset)
print(ref)
print(alt)
print(eventLength)
}
newSequence <-paste0(substr(wildTypeCDNA, start=1, stop=offset-1),
alt,
substr(wildTypeCDNA, start=offset+nchar(ref), stop=nchar(wildTypeCDNA)))
aaSequence <- translate(DNAString(newSequence))
data.frame(hgvs=hgvs,cdna=as.character(newSequence),aa=as.character(aaSequence))
}
#' createCDNASequences
#'
#' Given an HGVS mutation code, produce corresponding cDNA and amino-acid sequence,
#' starting from the ATG initiator codon and up to STOP codon.
#' @param mutationList Character string vector with HGVS mutation codes to produce
#' @param transcriptID Character string with ENST ID to which hgvs code corresponds
#' @param lowerCaseChange Boolean - Highlight change in lowercase
#' @return Data frame with fields cdna and aa representing new cDNA and amino acid mutated sequence
#' @keywords cats
#' @export
#' @examples
#' str <- createCDNASequences(c('c.571G>C','c.1559delT'),'ENST00000374840')
createCDNASequences <- function(mutationList, transcriptID,lowerCaseChange=F) {
wtCDNA <- getCDNASequence(transcriptID)
res <- lapply(mutationList,produceSequenceFromHGVSCode,transcriptID,wtCDNA,lowerCaseChange)
do.call("rbind",res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.