#' request_from_ensembl
#'
#' Internal Function to send request to Ensembl REST API
#' @param url Full URL to send to Ensembl
#' @return JSON object with full request
#' @keywords cats
#' @export
#' @import httr
#' @examples
#' seq<- request_from_ensembl(url)
request_from_ensembl <- function(url, tries=0) {
#assign("initial_time", Sys.time(), envir=timeEnv)#' make a URL request to the Ensembl service
initial_time = Sys.time()
# strip any possible whitespace from the url, since imported datasets
# occasionally contain whitespace in the values used to construct the URL.
url = gsub(" ", "", url)
# check that we are not requesting urls fater than that allowed by Ensembl,
# sleep until the period expires
current_time = Sys.time()
diff = 0.05 - as.numeric(current_time - initial_time)
if (diff > 0) { Sys.sleep(diff) }
# set the previous request time to that of the current request
#assign("initial_time", current_time, envir=timeEnv)
# cut out after making 5 attempts to access the url
tries = tries + 1
stopifnot(tries <= 5)
request = httr::GET(url)
# handle the possible http request return status codes, such as when the
# server is unavailable, when we have made too many requests, or requested
# an impossible URL.
if (request$status_code == 503) { # server down
Sys.sleep(30)
return(request_from_ensembl(url, tries))
} else if (request$status_code == 429) { # too frequent requests
reset_time = as.numeric(request$headers$`x-ratelimit-reset`)
Sys.sleep(reset_time)
return(request_from_ensembl(url, tries))
} else if (request$status_code == 400) { # bad url
msg = paste("bad url request: ", url, sep = "")
return(NA)
} else if (request$status_code != 200) { # other status errors
msg = paste("unknown status: ", request$status_code, " at: ", url, sep = "")
stop(msg)
}
return(intToUtf8(request$content))
}
#' getGenomicData
#'
#' Function to get raw genomic data from Ensembl REST API
#' @param tag ENST tag to get sequence for
#' @return list with elements startcoord, endCoord and seq=sequence
#' @keywords cats
#' @export
#' @examples
#' seq<- getGenomicData('ENST00000343536')
getGenomicData <- function(tag) {
# construct the VEP REST url for the variant
base_url = "http://grch37.rest.ensembl.org/"
ext = "sequence/id/"
ext = paste(ext, tag, sep="")
url = paste(base_url, ext,"?type=genomic")
# get the VEP data for the variant
request = request_from_ensembl(url)
#print (request)
result <- rjson::fromJSON(request)
desc <- strsplit(result$desc,":")[[1]]
seq <- result$seq
startCoord <- desc[4]
stopCoord <- desc[5]
return(list(startCoord=startCoord,stopCoord=stopCoord,seq=seq))
}
#' getCDNASequence
#'
#' Function to get raw cDNA (CDS) from Ensembl REST API
#' @param tag ENST tag to get sequence for
#' @return List containing raw reply
#' @keywords cats
#' @export
#' @examples
#' seq<- getCDNASequence('ENST00000343536')
getCDNASequence <- function(tag) {
# construct the VEP REST url for the variant
base_url = "http://grch37.rest.ensembl.org/"
ext = "sequence/id/"
ext = paste(ext, tag, sep="")
url = paste(base_url, ext,"?type=cds")
# get the VEP data for the variant
request = request_from_ensembl(url)
#print (request)
result <- rjson::fromJSON(request)$seq
result
}
#' getVEPFromHGVS
#'
#' Function to get raw cDNA (CDS) from Ensembl REST API
#' @param hgvsTag Character string with mutation HGVS Code
#' @param tag Character string with ENST tag to get sequence for
#' @param idIncludedInTag Does hgvsTag have form ENSTxxx:c.156T>A or just the hgvs code?
#' @param args Extra args to pass to VEP API request.
#' @param includeOnlyCanonicalTranscript Only output canonical transcript data. default to FALSE
#' @param outputDataFrame Flag to indicate whether data frame or list is output. Defaults to FALSE
#' @param verbose Output verbose data for debugging
#' @return Request from VEP API
#' @keywords cats
#' @export
#' @examples
#' seq <- getVEPFromHGVS('c.1559delT','ENST00000343536')
getVEPFromHGVS <- function(hgvsTag, transcriptID, idIncludedInTag=F,
args="canonical=1", includeOnlyCanonicalTranscript=F,
outputDataFrame=F, verbose=F) {
if (idIncludedInTag) tag=as.character(hgvsTag) else tag=paste0(transcriptID,":",as.character(hgvsTag))
if (verbose) print(paste0("Requesting VEP API:", tag))
url = sprintf('http://grch37.rest.ensembl.org/vep/human/hgvs/%s?%s',tag,args)
request = request_from_ensembl(url)
if (is.na(request)) {
warning(paste0("Bad request:",url))
return(NA)
} else {
r <- rjson::fromJSON(request)
if (length(r)==0) {
warning(paste0("Empty reply received:",url))
return(NA)
}
reply <- r[[1]]
if (!('transcript_consequences' %in% names(reply) )) {
warning(paste0("Incomplete reply received:",url))
return(NA)
}
# flatten lists to make it easier to parse downsstream if required
if (includeOnlyCanonicalTranscript) {
tc<- (Filter(function(x) x$transcript_id==transcriptID, reply$transcript_consequences)) # only keep annotations for given transcript ID
reply$transcript_consequences<-tc[[1]]
}
if (outputDataFrame) reply <- annovarFormatFromVEP(reply)
return(reply)
}
}
annovarFormatFromVEP <- function(vepReply) {
chrom = vepReply$seq_region_name
pstart = vepReply$start
pstop = vepReply$end
if (pstop < pstart) {
pstart = pstop
}
alstr <- vepReply$allele_string
a<-strsplit(x = alstr,split="/")
ref <- a[[1]][1]
alt <- a[[1]][2]
if (vepReply$transcript_consequences$strand == -1 ) {
ref = as.character(reverseComplement(DNAString((ref))))
alt = as.character(reverseComplement(DNAString((alt))))
}
outdata <- data.frame(Chrom=as.character(vepReply$seq_region_name),
Start=as.numeric(pstart),
Stop=as.numeric(pstop),
Reference=as.character(ref),
Alternate=as.character(alt))
outdata <- cbind(outdata,data.frame(t(unlist(vepReply))))
}
#' getCanonicalTranscript
#'
#' Function to get transcript object for a given gene from Ensembl REST API
#' @param tag ENST tag to get sequence for
#' @return List containing only transcript elements. Most important field: transcript$id gives corresponding ENST tag
#' @keywords cats
#' @export
#' @examples
#' transcript <- getCanonicalTranscript('CFTR')
getCanonicalTranscript <- function(geneName) {
url = sprintf('http://rest.ensembl.org/xrefs/symbol/homo_sapiens/%s?',geneName)
request = request_from_ensembl(url)
if (is.na(request)) {
warning(paste0("Bad request:",url))
return(NA)
} else {
request = rjson::fromJSON(request)[[1]]
if (request$type == "gene") {
geneID <- request$id
url = sprintf('http://grch37.rest.ensembl.org/lookup/id/%s?expand=1',geneID)
#print(url)
# get the VEP data for the variant
request = request_from_ensembl(url)
if (is.na(request)) {
warning(paste0("Bad request:",url))
return(NA)
} else {
request = rjson::fromJSON(request)
canonicalTranscript <- Filter(function(x) x$is_canonical>0, request$Transcript)[[1]]
return(canonicalTranscript)
}
return(request)
} else {
warning(paste0("Unexpected object:",request))
return(NA)
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.