is_protein_seq <- function(x) {
AA_STANDARD <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I",
"L", "K","M", "F", "P", "S", "T", "W", "Y", "V")
x.vc <- x %>%
as.character() %>%
strsplit(split = "") %>%
unlist()
all(x.vc %in% AA_STANDARD)
}
parse_hash_xml <- function(xml, hash){
df <- xml %>%
XML::xpathSApply(hash, XML::xpathSApply, '@*') %>%
t() %>%
as.data.frame()%>%
# tibble::as_tibble(.name_repair = "unique") %>%
dplyr::distinct()
df_format_hmmer(df)
}
df_format_hmmer <- function(df){
numeric_cols <- c("Z", "Z_setby", "domZ", "domZ_setby", "elapsed" ,
"n_past_bias","n_past_fwd", "n_past_msv", "n_past_vit",
"nhits" ,"nincluded", "nmodels", "nreported", "nseqs",
"page", "sys", "total", "unpacked", "user",
"bias", "evalue", "flags", "hindex", "ndom", "nincluded",
"nregions","nreported", "pvalue", "score","aliId", "aliIdCount",
"aliL","aliM", "aliN", "aliSim", "aliSimCount", "alihindex",
"alihmmfrom","alihmmto","alisqfrom","alisqto" , "bias",
"bitscore", "cevalue", "iali", "ienv", "ievalue","jali", "jenv",
"oasc", "outcompeted", "significant","uniq")
cols <- intersect(numeric_cols,colnames(df))
df %>%
dplyr::mutate(dplyr::across(cols, as.numeric))
}
parse_uuid_xml <- function(xml){
xml %>%
XML::xpathSApply('//data', XML::xpathSApply, '@*') %>%
magrittr::extract("uuid", 1)
}
parse_hits_xml_if_pdb <- function(xml){
hits <- xml %>%
XML::xpathSApply('///hits', add.pdbs) %>%
as.data.frame(stringsAsFactors=FALSE) %>%
magrittr::set_colnames(NULL) %>%
t() %>%
as.data.frame()%>%
# tibble::as_tibble(.name_repair = "unique") %>%
dplyr::distinct()
}
workaround_for_act_site <- function(hmm){
## workaround for 'act_site' tags that can not be parsed
## XML Parsing Error: not well-formed, e.g. for 1h1h_A
## type = hmmscan and pdb = pfam
## https://github.com/cran/bio3d/blob/master/R/hmmer.R
lines <- unlist(strsplit(hmm, "\n"))
actsite.inds <- grep("act_site", lines)
actlen <- length(actsite.inds)
if(actlen>2) {
if(actlen%%2 != 0) {
stop("Bad XML format")
}
rm.inds <- NULL
for(i in seq(1, actlen, 2)) {
rm.inds <- c(rm.inds, seq(actsite.inds[i], actsite.inds[i+1]))
}
hmm <- paste(lines[-rm.inds], collapse="\n")
}
else {
hmm <- paste(lines[-seq(actsite.inds[1], actsite.inds[2])],
collapse="\n")
}
return(hmm)
}
## Add pdbs to hits if db = pdb
## https://github.com/cran/bio3d/blob/master/R/hmmer.R
add.pdbs <- function(x, ...) {
hit <- XML::xpathSApply(x, '@*')
pdbs <- unique(XML::xpathSApply(x, 'pdbs', XML::xmlToList))
new <- as.matrix(hit, ncol=1)
if(length(pdbs) > 1) {
for(i in 1:length(pdbs)) {
hit["acc"]=pdbs[i]
new=cbind(new, hit)
}
colnames(new)=NULL
}
return(new)
}
request_hmmer_using_grid_seqs <- function(x, y, url,
otherwise_list, curl.opts, verbose, fullseqfasta,
seqdb_or_hmmdb, alignment = FALSE){
purrr::map2(x, y,
purrr::possibly(quiet = FALSE, #show errors
otherwise = otherwise_list,
~ {
# Request HMMER
seq <- as.character(.x)
db <- tolower(.y)
seqdb = NULL
hmmdb = NULL
if (seqdb_or_hmmdb == "seqdb") {
seqdb <- db
}
if (seqdb_or_hmmdb == "hmmdb") {
hmmdb <- db
}
hmm <- RCurl::postForm(url, seqdb=seqdb, hmmdb = hmmdb, seq=seq,
style = "POST", .opts = curl.opts,
.contentEncodeFun=RCurl::curlPercentEncode,
.checkParams=TRUE )
## check results from the server
if(!grepl("results", hmm))
stop("Request to HMMER server failed")
if(grepl("act_site", hmm))
hmm <- workaround_for_act_site(hmm)
if(verbose)
message(paste0("Content from HMMER server:\n",
hmm))
## parse xml
xml <- XML::xmlParse(hmm)
## Web url
uuid <- parse_uuid_xml(xml)
result.url <- paste0("http://www.ebi.ac.uk/Tools/hmmer/results/", uuid)
## Obtain data
stats <- xml %>%
parse_hash_xml("///stats")
## Hits
if( db == "pdb"){
hits <- parse_hits_xml_if_pdb(xml)
} else{
hits <- xml %>%
parse_hash_xml("///hits")
}
## Domains
domains <- xml %>%
parse_hash_xml("///domains")
## Create list
rowlist <- list("url" = result.url, "stats" = stats,
"hits" = hits, "domains" = domains)
## Fullfasta
if (fullseqfasta) {
fasta.url <- paste0("https://www.ebi.ac.uk/Tools/hmmer/download/",
uuid, "/score?format=fullfasta")
rowlist <- append(rowlist,
list("fullseqfasta" = fasta.url %>%
Biostrings::readAAStringSet()))
}
if (alignment) {
alignment.url <- paste0("https://www.ebi.ac.uk/Tools/hmmer/download/",
uuid, "/score?format=afa")
rowlist <- append(rowlist,
list("alignment" = alignment.url %>%
Biostrings::readAAMultipleAlignment()))
}
return(rowlist)
}))
}
request_hmmer_using_grid_alns <- function(x, y, url,
otherwise_list,
curl.opts, verbose,
fullseqfasta, alignment){
purrr::map2(x, y,
purrr::possibly(quiet = FALSE, #show errors
otherwise = otherwise_list,
~{
# Request HMMER
aln <- .x
db <- tolower(.y)
hmm <- RCurl::postForm(url, seqdb=db, seq=aln,
style = "POST", .opts = curl.opts,
.contentEncodeFun=RCurl::curlPercentEncode,
.checkParams=TRUE )
## check results from the server
if(!grepl("results", hmm))
stop("Request to HMMER server failed")
if(grepl("act_site", hmm))
hmm <- workaround_for_act_site(hmm)
if(verbose)
message(paste0("Content from HMMER server:\n",
hmm))
## parse xml
xml <- XML::xmlParse(hmm)
## Web url
uuid <- parse_uuid_xml(xml)
result.url <- paste0("http://www.ebi.ac.uk/Tools/hmmer/results/", uuid)
## Obtain data
stats <- xml %>%
parse_hash_xml("///stats")
## Hits
if( db == "pdb"){
hits <- parse_hits_xml_if_pdb(xml)
} else{
hits <- hits <- xml %>%
parse_hash_xml("///hits")
}
## Domains
domains <- xml %>%
parse_hash_xml("///domains")
## Create list
rowlist <- list("url" = result.url, "stats" = stats,
"hits" = hits, "domains" = domains)
## Fullfasta
if (fullseqfasta) {
fasta.url <- paste0("https://www.ebi.ac.uk/Tools/hmmer/download/",
uuid, "/score?format=fullfasta")
rowlist <- append(rowlist,
list("fullseqfasta" = fasta.url %>%
Biostrings::readAAStringSet()))
}
if (alignment) {
alignment.url <- paste0("https://www.ebi.ac.uk/Tools/hmmer/download/",
uuid, "/score?format=afa")
rowlist <- append(rowlist,
list("alignment" = alignment.url %>%
Biostrings::readAAMultipleAlignment()))
}
return(rowlist)
})
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.