Nothing
#' Name UniProt search hits elements
#'
#' Every search hit in uniprot has a character element within it, named
#' "accession". this function should be used as the second response
#' parser in the *_search functions to set the name each search hit to
#' it's accession
#'
#' @param x object to be parsed
#'
#' @return a list with the same structure of the input, only named.
#' @noRd
.rba_uniprot_search_namer <- function(x) {
x_names <- try(expr = vapply(X = x,
FUN = function(x) {
x$accession
},
FUN.VALUE = character(1)),
silent = TRUE)
if (length(x) == length(x_names)) {
names(x) <- x_names
}
return(x)
}
#### Proteins Endpoints ####
#' Search UniProt entries
#'
#' Using this function, you can search and retrieve UniProt Knowledge-base
#' (UniProtKB) protein entries using variety of options. You may also
#' refine your search with modifiers such as sequence length, review status
#' etc. See "Arguments" section" for more information.
#'
#' Note that this is a search function. Thus, you are not required to fill
#' every argument; You may use whatever combinations of arguments you see
#' fit for your query.s
#' \cr UniProt Entries are grouped in two sections:\enumerate{
#' \item Reviewed(Swiss-Prot): Manually annotated records with information
#' extracted from literature and curator-evaluated computational analysis.
#' \item Unreviewed (TrEMBL): Computationally analyzed records that await
#' full manual annotation.}
#'
#' @section Corresponding API Resources:
#' "GET https://www.ebi.ac.uk/proteins/api/proteins"
#'
#' @param accession \href{https://www.uniprot.org/help/accession_numbers}{
#' UniProtKB primary or secondary accession}(s). You can supply up to 100
#' accession numbers.
#' @param reviewed Logical: If TRUE, only return
#' "UniProtKB/Swiss-Prot" (reviewed) entries; If FALSE, only return TrEMBL
#' (un-reviewed) entries.
#' @param isoform Numeric: you have three options:\itemize{
#' \item 0: Exclude isoforms.
#' \item 1: Return isoforms only.
#' \item 2: Return both.}
#' see: \href{https://www.uniprot.org/help/alternative_products}{Alternative
#' products}
#' @param go_term Limit the search to entries associated with your supplied GO
#' (\href{https://www.uniprot.org/help/gene_ontology}{Gene Ontology}) term.
#' You can supply Either GO ID or a character string -partially or fully-
#' matching the term. e.g. "GO:0001776" or "leukocyte homeostasis". if You
#' supply "leukocyte", any term containing that word will be included,
#' e.g "leukocyte chemotaxis", "leukocyte activation".
#' @param keyword Limit the search to entries that contain your supplied
#' keyword. see: \href{https://www.uniprot.org/keywords/}{UniProt Keywords}
#' @param ec \href{https://enzyme.expasy.org/}{EC (Enzyme Commission) number(s)}.
#' You can supply up to 20 EC numbers.
#' @param gene \href{https://www.uniprot.org/help/gene_name}{UniProt gene
#' name(s)}. You can supply up to 20 gene names. e.g. if you supply
#' "CD40", "CD40 ligand" will also be included.
#' @param exact_gene \href{https://www.uniprot.org/help/gene_name}{UniProt
#' exact gene name(s)}. You can supply up to 20 exact gene names. e.g.
#' if you supply "CD40", "CD40 ligand" will not be included in the results.
#' @param protein \href{https://www.uniprot.org/help/protein_names}{UniProt
#' protein name}
#' @param organism \href{https://www.uniprot.org/taxonomy/}{Organism name}.
#' @param taxid NIH-NCBI \href{https://www.uniprot.org/taxonomy/}{Taxon ID}.
#' You can supply up to 20 taxon IDs.
#' @param pubmed Entries which \href{https://www.uniprot.org/citations/}{cite
#' to} the article with your supplied PubMed ID.
#' @param seq_length An exact sequence length (e.g. 150) or a range of sequence
#' lengths (e.g. "130-158").
#' @param md5 Sequence md5 value.
#' @param ... rbioapi option(s). See \code{\link{rba_options}}'s
#' arguments manual for more information on available options.
#'
#' @return A List where each element corresponds to one UniProt entity returned
#' by your search query. The element itself is a sub-list containing all
#' information that UniProt has about that entity.
#'
#' @references \itemize{
#' \item The UniProt Consortium , UniProt: the Universal Protein
#' Knowledgebase in 2025, Nucleic Acids Research, 2024;, gkae1010,
#' https://doi.org/10.1093/nar/gkae1010
#' \item Andrew Nightingale, Ricardo Antunes, Emanuele Alpi, Borisas
#' Bursteinas, Leonardo Gonzales, Wudong Liu, Jie Luo, Guoying Qi, Edd
#' Turner, Maria Martin, The Proteins API: accessing key integrated protein
#' and genome information, Nucleic Acids Research, Volume 45, Issue W1,
#' 3 July 2017, Pages W539–W544, https://doi.org/10.1093/nar/gkx237
#' \item \href{https://www.ebi.ac.uk/proteins/api/doc/}{Proteins API
#' Documentation}
#' \item \href{https://www.uniprot.org/help/publications}{Citations note
#' on UniProt website}
#' }
#'
#' @examples
#' \donttest{
#' rba_uniprot_proteins_search(accession = "Q99616")
#' }
#' \donttest{
#' rba_uniprot_proteins_search(gene = "cd40")
#' }
#' \donttest{
#' rba_uniprot_proteins_search(gene = "cd40 ligand")
#' }
#' \donttest{
#' rba_uniprot_proteins_search(gene = "cd40", reviewed = TRUE)
#' }
#' \donttest{
#' rba_uniprot_proteins_search(gene = "cd40", reviewed = TRUE, isoform = 1)
#' }
#' \donttest{
#' rba_uniprot_proteins_search(keyword = "Inhibition of host chemokines by virus")
#' }
#' \donttest{
#' rba_uniprot_proteins_search(keyword = "chemokines")
#' }
#'
#' @family "UniProt - Proteins"
#' @export
rba_uniprot_proteins_search <- function(accession = NULL,
reviewed = NULL,
isoform = NULL,
go_term = NULL,
keyword = NULL,
ec = NULL,
gene = NULL,
exact_gene = NULL,
protein = NULL,
organism = NULL,
taxid = NULL,
pubmed = NULL,
seq_length = NULL,
md5 = NULL,
...) {
## Load Global Options
.rba_ext_args(...)
## Check User-input Arguments
.rba_args(
cons = list(
list(arg = "accession", class = "character", max_len = 100),
list(arg = "reviewed", class = "logical"),
list(arg = "isoform", class = "numeric", val = c(0, 1, 2)),
list(arg = "go_term", class = "character"),
list(arg = "keyword", class = "character"),
list(arg = "ec", class = "character", max_len = 20),
list(arg = "gene", class = "character", max_len = 20),
list(arg = "exact_gene", class = "character", max_len = 20),
list(arg = "protein", class = "character"),
list(arg = "organism", class = "character"),
list(arg = "taxid", class = "numeric", max_len = 20),
list(arg = "pubmed", class = "character", max_len = 20),
list(arg = "seq_length", class = c("numeric", "character")),
list(arg = "md5", class = "character")
)
)
.msg(
"Searching UniProt and retrieving proteins that match your supplied inputs."
)
## Build GET API Request's query
call_query <- .rba_query(
init = list("size" = "-1"),
list("accession", !is.null(accession), paste0(accession, collapse = ",")),
list("reviewed", !is.null(reviewed), ifelse(reviewed, "true", "false")),
list("isoform", !is.null(isoform), isoform),
list("goterms", !is.null(go_term), go_term),
list("keywords", !is.null(keyword), keyword),
list("ec", !is.null(ec), paste0(ec, collapse = ",")),
list("gene", !is.null(gene), paste0(gene, collapse = ",")),
list("exact_gene", !is.null(exact_gene), paste0(exact_gene, collapse = ",")),
list("protein", !is.null(protein), protein),
list("organism", !is.null(organism), organism),
list("taxid", !is.null(taxid), paste0(taxid, collapse = ",")),
list("pubmed", !is.null(pubmed), paste0(pubmed, collapse = ",")),
list("seq_length", !is.null(seq_length), seq_length),
list("md5", !is.null(md5), md5)
)
## Build Function-Specific Call
parser_input <- list("json->list", .rba_uniprot_search_namer)
input_call <- .rba_httr(
httr = "get",
url = .rba_stg("uniprot", "url"),
path = paste0(.rba_stg("uniprot", "pth"), "proteins"),
query = call_query,
accept = "application/json",
parser = parser_input,
save_to = .rba_file("uniprot_proteins_search.json")
)
## Call API
final_output <- .rba_skeleton(input_call)
return(final_output)
}
#' Get UniProt entry by accession
#'
#' Use this function to retrieve a UniProt Entry by it's UniProt accession.
#' You can also use "isoform" or "interaction" arguments to retrieve
#' isoforms or interactor proteins of that entry. Note that in one function
#' call you can only set none or only one of "isoform" or "interaction" as
#' TRUE, not both of them.
#'
#' @section Corresponding API Resources:
#' "GET https://ebi.ac.uk/proteins/api/proteins/\{accession\}"
#' \cr "GET https://ebi.ac.uk/proteins/api/proteins/interaction/\{accession\}"
#' \cr "GET https://ebi.ac.uk/proteins/api/proteins/\{accession\}/isoforms"
#'
#' @param accession \href{https://www.uniprot.org/help/accession_numbers}{
#' UniProtKB primary or secondary accession}.
#' @param interaction Logical: (default = FALSE) Only retrieve
#' \href{https://www.uniprot.org/help/interaction_section}{interaction}
#' information of your supplied UniProt entity?
#' @param isoforms Logical: (default = FALSE) Only retrieve
#' \href{https://www.uniprot.org/help/alternative_products}{isoforms} of your
#' supplied UniProt entity?
#' @param ... rbioapi option(s). See \code{\link{rba_options}}'s
#' arguments manual for more information on available options.
#'
#' @return A list that contains UniProt protein informations with your
#' supplied accession.
#'
#' @references \itemize{
#' \item The UniProt Consortium , UniProt: the Universal Protein
#' Knowledgebase in 2025, Nucleic Acids Research, 2024;, gkae1010,
#' https://doi.org/10.1093/nar/gkae1010
#' \item Andrew Nightingale, Ricardo Antunes, Emanuele Alpi, Borisas
#' Bursteinas, Leonardo Gonzales, Wudong Liu, Jie Luo, Guoying Qi, Edd
#' Turner, Maria Martin, The Proteins API: accessing key integrated protein
#' and genome information, Nucleic Acids Research, Volume 45, Issue W1,
#' 3 July 2017, Pages W539–W544, https://doi.org/10.1093/nar/gkx237
#' \item \href{https://www.ebi.ac.uk/proteins/api/doc/}{Proteins API
#' Documentation}
#' \item \href{https://www.uniprot.org/help/publications}{Citations note
#' on UniProt website}
#' }
#'
#' @examples
#' \donttest{
#' rba_uniprot_proteins(accession = "P01730")
#' }
#' \donttest{
#' rba_uniprot_proteins(accession = "P01730", interaction = TRUE)
#' }
#' \donttest{
#' rba_uniprot_proteins(accession = "Q29983", isoforms = TRUE)
#' }
#'
#' @family "UniProt - Proteins"
#' @export
rba_uniprot_proteins <- function(accession,
interaction = FALSE,
isoforms = FALSE,
...) {
## Load Global Options
.rba_ext_args(...)
## Check User-input Arguments
.rba_args(
cons = list(
list(arg = "accession", class = "character"),
list(arg = "interaction", class = "logical"),
list(arg = "isoforms", class = "logical")
),
cond = list(
list(
quote(sum(interaction, isoforms) == 2),
"You can only set only one of interaction or isoform as TRUE in one function call.")
)
)
.msg(
"Retrieving %sUniProt Entity with accession number %s.",
if (isTRUE(interaction)) {
"Interactions of "
} else if (isTRUE(isoforms)) {
"isoforms of "
} else {
""},
accession
)
## Build Function-Specific Call
path_input <- sprintf(
"%s%s/%s",
.rba_stg("uniprot", "pth"),
ifelse(isTRUE(interaction), yes = "proteins/interaction", no = "proteins"),
accession)
if (isTRUE(isoforms)) {
path_input <- paste0(path_input, "/isoforms")
}
parser_input <- ifelse(
isTRUE(interaction) | isTRUE(isoforms),
yes = "json->list",
no = "json->list_simp"
)
input_call <- .rba_httr(
httr = "get",
url = .rba_stg("uniprot", "url"),
path = path_input,
accept = "application/json",
parser = parser_input,
save_to = .rba_file("uniprot_proteins.json")
)
## Call API
final_output <- .rba_skeleton(input_call)
return(final_output)
}
#' Get UniProt Entry by UniProt Cross-Reference Database and ID
#'
#' \href{https://www.uniprot.org/database/}{UniProt Cross-Reference} links
#' protein Entities with cross-reference (external) databases. Using this
#' function, you can retrieve a UniProt entity using external database name
#' and protein ID in that database.
#'
#' @section Corresponding API Resources:
#' "GET https://www.ebi.ac.uk/proteins/api/proteins/\{dbtype\}:\{dbid\}"
#'
#' @param db_id The protein ID in the cross-reference (external) database.
#' @param db_name \href{https://www.uniprot.org/database/}{cross-reference}
#' (external database) name.
#' @param reviewed Logical: (Optional) If TRUE, only returns
#' "UniProtKB/Swiss-Prot" (reviewed) entries; If FALSE, only returns TrEMBL
#' (un-reviewed) entries.
#' @param isoform Numeric: (Optional) you have two options:\itemize{
#' \item 0: Exclude isoforms.
#' \item 1: Return isoforms only.}
#' see: \href{https://www.uniprot.org/help/alternative_products}{Alternative
#' products}
#' @param ... rbioapi option(s). See \code{\link{rba_options}}'s
#' arguments manual for more information on available options.
#'
#' @return List which each element is a UniProt entity that correspond to
#' your supplied cross-reference database name and ID.
#'
#' @references \itemize{
#' \item The UniProt Consortium , UniProt: the Universal Protein
#' Knowledgebase in 2025, Nucleic Acids Research, 2024;, gkae1010,
#' https://doi.org/10.1093/nar/gkae1010
#' \item Andrew Nightingale, Ricardo Antunes, Emanuele Alpi, Borisas
#' Bursteinas, Leonardo Gonzales, Wudong Liu, Jie Luo, Guoying Qi, Edd
#' Turner, Maria Martin, The Proteins API: accessing key integrated protein
#' and genome information, Nucleic Acids Research, Volume 45, Issue W1,
#' 3 July 2017, Pages W539–W544, https://doi.org/10.1093/nar/gkx237
#' \item \href{https://www.ebi.ac.uk/proteins/api/doc/}{Proteins API
#' Documentation}
#' \item \href{https://www.uniprot.org/help/publications}{Citations note
#' on UniProt website}
#' }
#'
#' @examples
#' \donttest{
#' rba_uniprot_proteins_crossref("cd40", "hgnc")
#' }
#' \donttest{
#' rba_uniprot_proteins_crossref("cd40", "hgnc", reviewed = TRUE)
#' }
#' \donttest{
#' rba_uniprot_proteins_crossref("mica", "hgnc", isoform = 0)
#' }
#'
#' @family "UniProt - Proteins"
#' @export
rba_uniprot_proteins_crossref <- function(db_id,
db_name,
reviewed = NULL,
isoform = NULL,
...) {
## Load Global Options
.rba_ext_args(...)
## Check User-input Arguments
.rba_args(
cons = list(
list(arg = "db_name",
class = "character"),
list(arg = "db_id",
class = "character"),
list(arg = "reviewed",
class = "logical"),
list(arg = "isoform",
class = "numeric",
val = c(0, 1, 2))
)
)
.msg(
"Retrieving UniProt entities that correspond to ID %s in database %s.",
db_id, db_name
)
## Build GET API Request's query
call_query <- .rba_query(
init = list("size" = "-1"),
list("reviewed", !is.null(reviewed), ifelse(reviewed, "true", "false")),
list("isoform", !is.null(isoform), isoform)
)
## Build Function-Specific Call
input_call <- .rba_httr(
httr = "get",
url = .rba_stg("uniprot", "url"),
path = sprintf("%sproteins/%s:%s", .rba_stg("uniprot", "pth"), db_name, db_id),
query = call_query,
accept = "application/json",
parser = "json->list",
save_to = .rba_file("uniprot_proteins_crossref.json")
)
## Call API
final_output <- .rba_skeleton(input_call)
return(final_output)
}
#### Features Endpoints ####
#' Search UniProt protein sequence features
#'
#' UniProt maintains \href{https://www.uniprot.org/help/sequence_annotation}{
#' sequence annotations (features)} that describe regions
#' in the protein sequence. Using this function, you can search and
#' retrieve UniProt proteins' sequence annotations (features). you may also
#' refine your search query with variety of modifiers.
#'
#' Note that this is a search function. Thus, you are not required to fill
#' every argument; You may use whatever combinations of arguments you see
#' fit for your query.
#' \cr UniProt Entries are grouped in two sections:\enumerate{
#' \item Reviewed(Swiss-Prot): Manually annotated records with information
#' extracted from literature and curator-evaluated computational analysis.
#' \item Unreviewed (TrEMBL): Computationally analyzed records that await
#' full manual annotation.}
#'
#' @section Corresponding API Resources:
#' "GET https://www.ebi.ac.uk/proteins/api/features"
#'
#' @param accession \href{https://www.uniprot.org/help/accession_numbers}{
#' UniProtKB primary or secondary accession}(s). You can supply up to 100
#' accession numbers.
#' @param gene \href{https://www.uniprot.org/help/gene_name}{UniProt gene
#' name(s)}. You can supply up to 20 gene names. e.g. if you supply
#' "CD40", "CD40 ligand" will also be included.
#' @param exact_gene \href{https://www.uniprot.org/help/gene_name}{UniProt
#' exact gene name(s)}. You can supply up to 20 exact gene names. e.g.
#' if you supply "CD40", "CD40 ligand" will not be included in the results.
#' @param protein \href{https://www.uniprot.org/help/protein_names}{UniProt
#' protein name}
#' @param reviewed Logical: If TRUE, only return
#' "UniProtKB/Swiss-Prot" (reviewed) entries; If FALSE, only return TrEMBL
#' (un-reviewed) entries.
#' @param organism \href{https://www.uniprot.org/taxonomy/}{Organism name}.
#' @param taxid NIH-NCBI \href{https://www.uniprot.org/taxonomy/}{Taxon ID}.
#' You can supply up to 20 taxon IDs.
#' @param categories \href{https://www.uniprot.org/help/sequence_annotation}{
#' Sequence annotation (Features)} categories (subsection). accepted values
#' are: "MOLECULE_PROCESSING", "TOPOLOGY", "SEQUENCE_INFORMATION",
#' "STRUCTURAL", "DOMAINS_AND_SITES", "PTM", "VARIANTS" and/or "MUTAGENESIS".
#' You can supply up to 8 categories.
#' @param types \href{https://www.uniprot.org/help/sequence_annotation}{
#' Sequence annotation (Features)} types. accepted values
#' are: "INIT_MET", "SIGNAL", "PROPEP", "TRANSIT", "CHAIN", "PEPTIDE",
#' "TOPO_DOM", "TRANSMEM", "DOMAIN", "REPEAT", "CA_BIND", "ZN_FING",
#' "DNA_BIND", "NP_BIND", "REGION", "COILED", "MOTIF", "COMPBIAS",
#' "ACT_SITE", "METAL", "BINDING", "SITE", "NON_STD", "MOD_RES", "LIPID",
#' "CARBOHYD", "DISULFID", "CROSSLNK", "VAR_SEQ", "VARIANT", "MUTAGEN",
#' "UNSURE", "CONFLICT", "NON_CONS", "NON_TER", "HELIX", "TURN", "STRAND"
#' and/or "INTRAMEM". You can supply up to 20 types.
#' @param ... rbioapi option(s). See \code{\link{rba_options}}'s
#' arguments manual for more information on available options.
#'
#' @return List where each element corresponds to one UniProt entity returned
#' by your search query. The element itself is a sub-list containing all
#' information that UniProt has about that entity.
#'
#' @references \itemize{
#' \item The UniProt Consortium , UniProt: the Universal Protein
#' Knowledgebase in 2025, Nucleic Acids Research, 2024;, gkae1010,
#' https://doi.org/10.1093/nar/gkae1010
#' \item Andrew Nightingale, Ricardo Antunes, Emanuele Alpi, Borisas
#' Bursteinas, Leonardo Gonzales, Wudong Liu, Jie Luo, Guoying Qi, Edd
#' Turner, Maria Martin, The Proteins API: accessing key integrated protein
#' and genome information, Nucleic Acids Research, Volume 45, Issue W1,
#' 3 July 2017, Pages W539–W544, https://doi.org/10.1093/nar/gkx237
#' \item \href{https://www.ebi.ac.uk/proteins/api/doc/}{Proteins API
#' Documentation}
#' \item \href{https://www.uniprot.org/help/publications}{Citations note
#' on UniProt website}
#' }
#'
#' @examples
#' \donttest{
#' rba_uniprot_features_search(accession = "Q99616")
#' }
#' \donttest{
#' rba_uniprot_features_search(gene = "cd40")
#' }
#' \donttest{
#' rba_uniprot_features_search(gene = "cd40 ligand")
#' }
#' \donttest{
#' rba_uniprot_features_search(gene = "cd40", reviewed = TRUE)
#' }
#' \donttest{
#' rba_uniprot_features_search(accession = "Q99616",
#' categories = c("MOLECULE_PROCESSING", "TOPOLOGY"))
#' }
#' \donttest{
#' rba_uniprot_features_search(accession = "Q99616", types = "DISULFID")
#' }
#'
#' @family "UniProt - Features"
#' @export
rba_uniprot_features_search <- function(accession = NULL,
gene = NULL,
exact_gene = NULL,
protein = NULL,
reviewed = NULL,
organism = NULL,
taxid = NULL,
categories = NULL,
types = NULL,
...) {
## Load Global Options
.rba_ext_args(...)
## Check User-input Arguments
.rba_args(
cons = list(
list(arg = "accession", class = "character", max_len = 100),
list(arg = "gene", class = "character", max_len = 20),
list(arg = "exact_gene", class = "character", max_len = 20),
list(arg = "protein", class = "character"),
list(arg = "reviewed", class = "logical"),
list(arg = "organism", class = "character"),
list(arg = "taxid", class = "numeric", max_len = 20),
list(
arg = "categories", class = "character", max_len = 8,
val = c("MOLECULE_PROCESSING",
"TOPOLOGY",
"SEQUENCE_INFORMATION",
"STRUCTURAL",
"DOMAINS_AND_SITES",
"PTM",
"VARIANTS",
"MUTAGENESIS")
),
list(
arg = "types", class = "character", max_len = 20,
val = c("INIT_MET",
"SIGNAL",
"PROPEP",
"TRANSIT",
"CHAIN",
"PEPTIDE",
"TOPO_DOM",
"TRANSMEM",
"DOMAIN",
"REPEAT",
"CA_BIND",
"ZN_FING",
"DNA_BIND",
"NP_BIND",
"REGION",
"COILED",
"MOTIF",
"COMPBIAS",
"ACT_SITE",
"METAL",
"BINDING",
"SITE",
"NON_STD",
"MOD_RES",
"LIPID",
"CARBOHYD",
"DISULFID",
"CROSSLNK",
"VAR_SEQ",
"VARIANT",
"MUTAGEN",
"UNSURE",
"CONFLICT",
"NON_CONS",
"NON_TER",
"HELIX",
"TURN",
"STRAND",
"INTRAMEM")
)
)
)
.msg(
"Searching UniProt and retrieving sequence annotations (features) of proteins that match your supplied inputs."
)
## Build GET API Request's query
call_query <- .rba_query(
init = list("size" = "-1"),
list("accession", !is.null(accession), paste0(accession, collapse = ",")),
list("gene", !is.null(gene), paste0(gene, collapse = ",")),
list("exact_gene", !is.null(exact_gene), paste0(exact_gene, collapse = ",")),
list("protein", !is.null(protein), protein),
list("reviewed", !is.null(reviewed), ifelse(reviewed, "true", "false")),
list("organism", !is.null(organism), organism),
list("taxid", !is.null(taxid), paste0(taxid, collapse = ",")),
list("categories", !is.null(categories), paste0(categories, collapse = ",")),
list("types", !is.null(types), paste0(types, collapse = ","))
)
## Build Function-Specific Call
parser_input <- list("json->list", .rba_uniprot_search_namer)
input_call <- .rba_httr(
httr = "get",
url = .rba_stg("uniprot", "url"),
path = paste0(.rba_stg("uniprot", "pth"), "features"),
query = call_query,
accept = "application/json",
parser = parser_input,
save_to = .rba_file("uniprot_features_search.json")
)
## Call API
final_output <- .rba_skeleton(input_call)
return(final_output)
}
# #' Search protein sequence features of a given type in UniProt
# #'
# #' Search for term(s) that appear in feature description for your specified
# #' feature type. For example, you can search by type=DOMAIN and
# #' Term=Kinase. Comma separated values
# #'
# #' @section Corresponding API Resources:
# #' "GET https://www.ebi.ac.uk/proteins/api"
# #'
# #' @param terms
# #' @param type
# #' @param categories
# #' @param ... rbioapi option(s). See \code{\link{rba_options}}'s
# #' arguments manual for more information on available options.
# #'
# #' @return
# #'
# #' @references \itemize{
# #' \item The UniProt Consortium , UniProt: the Universal Protein
# #' Knowledgebase in 2025, Nucleic Acids Research, 2024;, gkae1010,
# #' https://doi.org/10.1093/nar/gkae1010
# #' \item Andrew Nightingale, Ricardo Antunes, Emanuele Alpi, Borisas
# #' Bursteinas, Leonardo Gonzales, Wudong Liu, Jie Luo, Guoying Qi, Edd
# #' Turner, Maria Martin, The Proteins API: accessing key integrated protein
# #' and genome information, Nucleic Acids Research, Volume 45, Issue W1,
# #' 3 July 2017, Pages W539–W544, https://doi.org/10.1093/nar/gkx237
# #' \item \href{https://www.ebi.ac.uk/proteins/api/doc/}{Proteins API
# #' Documentation}
# #' \item \href{https://www.uniprot.org/help/publications}{Citations note
# #' on UniProt website}
# #' }
# #'
# # #' @examples
# #'
# #' @family "UniProt - Features"
# #' @export
# rba_uniprot_features_type <- function(terms,
# type,
# categories = NULL,
# ...) {
# ## Load Global Options
# .rba_ext_args(...)
#
# ## Check User-input Arguments
# .rba_args(
# cons = list(
# list(arg = "terms", class = "character", max_len = 20),
# list(
# arg = "type", class = "character", len = 1,
# val = c("INIT_MET",
# "SIGNAL",
# "PROPEP",
# "TRANSIT",
# "CHAIN",
# "PEPTIDE",
# "TOPO_DOM",
# "TRANSMEM",
# "DOMAIN",
# "REPEAT",
# "CA_BIND",
# "ZN_FING",
# "DNA_BIND",
# "NP_BIND",
# "REGION",
# "COILED",
# "MOTIF",
# "COMPBIAS",
# "ACT_SITE",
# "METAL",
# "BINDING",
# "SITE",
# "NON_STD",
# "MOD_RES",
# "LIPID",
# "CARBOHYD",
# "DISULFID",
# "CROSSLNK",
# "VAR_SEQ",
# "VARIANT",
# "MUTAGEN",
# "UNSURE",
# "CONFLICT",
# "NON_CONS",
# "NON_TER",
# "HELIX",
# "TURN",
# "STRAND",
# "INTRAMEM")
# ),
# list(
# arg = "categories", class = "character", max_len = 8,
# val = c("MOLECULE_PROCESSING",
# "TOPOLOGY",
# "SEQUENCE_INFORMATION",
# "STRUCTURAL",
# "DOMAINS_AND_SITES",
# "PTM",
# "VARIANTS",
# "MUTAGENESIS.")
# )
# )
# )
#
# .msg(
# "get /features/type/{type} Search protein sequence features of a given type in UniProt"
# )
#
# ## Build GET API Request's query
# call_query <- .rba_query(
# init = list("size" = "-1"),
# list("categories", !is.null(categories), paste0(categories, collapse = ",")),
# list("terms", !is.null(terms), paste0(terms, collapse = ","))
# )
#
# ## Build Function-Specific Call
# input_call <- .rba_httr(
# httr = "get",
# url = .rba_stg("uniprot", "url"),
# path = paste0(.rba_stg("uniprot", "pth"), "features/type/", type),
# query = call_query,
# accept = "application/json",
# parser = "json->list",
# save_to = .rba_file("uniprot_features_type.json")
# )
#
# ## Call API
# final_output <- .rba_skeleton(input_call)
# return(final_output)
# }
#' Get UniProt protein sequence features by accession
#'
#' Use this function to retrieve
#' \href{https://www.uniprot.org/help/sequence_annotation}{sequence annotations
#' (features)} of a protein by it's UniProt accession.
#'
#' @section Corresponding API Resources:
#' "GET https://www.ebi.ac.uk/proteins/api/features/\{accession\}"
#'
#' @param accession \href{https://www.uniprot.org/help/accession_numbers}{
#' UniProtKB primary or secondary accession}.
#' @param types \href{https://www.uniprot.org/help/sequence_annotation}{
#' Sequence annotation (Features)} types. accepted values
#' are: "INIT_MET", "SIGNAL", "PROPEP", "TRANSIT", "CHAIN", "PEPTIDE",
#' "TOPO_DOM", "TRANSMEM", "DOMAIN", "REPEAT", "CA_BIND", "ZN_FING",
#' "DNA_BIND", "NP_BIND", "REGION", "COILED", "MOTIF", "COMPBIAS",
#' "ACT_SITE", "METAL", "BINDING", "SITE", "NON_STD", "MOD_RES", "LIPID",
#' "CARBOHYD", "DISULFID", "CROSSLNK", "VAR_SEQ", "VARIANT", "MUTAGEN",
#' "UNSURE", "CONFLICT", "NON_CONS", "NON_TER", "HELIX", "TURN", "STRAND"
#' and/or "INTRAMEM". You can supply up to 20 types.
#' @param categories \href{https://www.uniprot.org/help/sequence_annotation}{
#' Sequence annotation (Features)} categories (subsection). accepted values
#' are: "MOLECULE_PROCESSING", "TOPOLOGY", "SEQUENCE_INFORMATION",
#' "STRUCTURAL", "DOMAINS_AND_SITES", "PTM", "VARIANTS" and/or "MUTAGENESIS".
#' You can supply up to 8 categories.
#' @param location (character) Filter the features by the amino acid position in the sequence(s).
#' Provide the range as a character string with the format "begin-end", e.g. "35-70"
#' @param ... rbioapi option(s). See \code{\link{rba_options}}'s
#' arguments manual for more information on available options.
#'
#' @return A list in which you can find all of your given protein's sequence
#' annotations in a sub-list named "features".
#'
#' @references \itemize{
#' \item The UniProt Consortium , UniProt: the Universal Protein
#' Knowledgebase in 2025, Nucleic Acids Research, 2024;, gkae1010,
#' https://doi.org/10.1093/nar/gkae1010
#' \item Andrew Nightingale, Ricardo Antunes, Emanuele Alpi, Borisas
#' Bursteinas, Leonardo Gonzales, Wudong Liu, Jie Luo, Guoying Qi, Edd
#' Turner, Maria Martin, The Proteins API: accessing key integrated protein
#' and genome information, Nucleic Acids Research, Volume 45, Issue W1,
#' 3 July 2017, Pages W539–W544, https://doi.org/10.1093/nar/gkx237
#' \item \href{https://www.ebi.ac.uk/proteins/api/doc/}{Proteins API
#' Documentation}
#' \item \href{https://www.uniprot.org/help/publications}{Citations note
#' on UniProt website}
#' }
#'
#' @examples
#' \donttest{
#' rba_uniprot_features("Q99616")
#' }
#' \donttest{
#' rba_uniprot_features(accession = "Q99616", types = "DISULFID")
#' }
#'
#' @family "UniProt - Features"
#' @export
rba_uniprot_features <- function(accession,
types = NULL,
categories = NULL,
location = NULL,
...) {
## Load Global Options
.rba_ext_args(...)
## Check User-input Arguments
.rba_args(
cons = list(
list(arg = "accession", class = "character"),
list(
arg = "types", class = "character", len = 1,
val = c("INIT_MET",
"SIGNAL",
"PROPEP",
"TRANSIT",
"CHAIN",
"PEPTIDE",
"TOPO_DOM",
"TRANSMEM",
"DOMAIN",
"REPEAT",
"CA_BIND",
"ZN_FING",
"DNA_BIND",
"NP_BIND",
"REGION",
"COILED",
"MOTIF",
"COMPBIAS",
"ACT_SITE",
"METAL",
"BINDING",
"SITE",
"NON_STD",
"MOD_RES",
"LIPID",
"CARBOHYD",
"DISULFID",
"CROSSLNK",
"VAR_SEQ",
"VARIANT",
"MUTAGEN",
"UNSURE",
"CONFLICT",
"NON_CONS",
"NON_TER",
"HELIX",
"TURN",
"STRAND",
"INTRAMEM")
),
list(
arg = "categories", class = "character", max_len = 8,
val = c("MOLECULE_PROCESSING",
"TOPOLOGY",
"SEQUENCE_INFORMATION",
"STRUCTURAL",
"DOMAINS_AND_SITES",
"PTM",
"VARIANTS",
"MUTAGENESIS.")
),
list(arg = "location", class = "character", regex = "^\\d+\\-\\d+$", len = 1)
)
)
.msg(
"Retrieving sequence annotations (features) of protein %s.",
accession
)
## Build GET API Request's query
call_query <- .rba_query(
init = list("size" = "-1"),
list("categories", !is.null(categories), paste0(categories, collapse = ",")),
list("types", !is.null(types), paste0(types, collapse = ",")),
list("location", !is.null(location), location)
)
## Build Function-Specific Call
input_call <- .rba_httr(
httr = "get",
url = .rba_stg("uniprot", "url"),
path = paste0(.rba_stg("uniprot", "pth"), "features/", accession),
query = call_query,
accept = "application/json",
parser = "json->list",
save_to = .rba_file("uniprot_features.json")
)
## Call API
final_output <- .rba_skeleton(input_call)
return(final_output)
}
#### Variation Endpoints ####
#' Search UniProt Natural Variants
#'
#' Using this function, you can search and retrieve
#' \href{https://www.uniprot.org/help/variant}{Natural variant(s)} that
#' has been annotated in the protein's sequences. You may also refine your
#' search with modifiers such as source type, disease etc. See
#' "Arguments section" for more information.
#'
#' Note that this is a search function. Thus, you are not required to fill
#' every argument; You may use whatever combinations of arguments you see
#' fit for your query.
#'
#' @section Corresponding API Resources:
#' "GET https://www.ebi.ac.uk/proteins/api/variation"
#'
#' @param accession \href{https://www.uniprot.org/help/accession_numbers}{
#' UniProtKB primary or secondary accession}(s). You can supply up to 100
#' accession numbers.
#' @param source_type Variation's source type. You can choose up to two of:
#' "UniProt", "large scale study" and/or "mixed".
#' @param consequence_type Variation's consequence type. You can choose up to
#' two of: "missense", "stop gained" or "stop lost".
#' @param wild_type Wild type amino acid. Accepted values are IUPAC
#' single-letter amino acid (e.g. D for Aspartic acid) and "*" for stop
#' codon. You can supply up to 20 values.
#' @param alternative_sequence Alternative amino acid. Accepted values are IUPAC
#' single-letter amino acid (e.g. D for Aspartic acid) and "*" for stop codon
#' and "-" for deletion. You can supply up to 20 values.
#' @param location A valid amino acid range (e.g. 10-25) within the sequence
#' range where the variation occurs. You can supply up to 20 values.
#' @param disease \href{https://www.uniprot.org/diseases/}{Human disease}
#' that are associated with a sequence variation. Accepted values are
#' disease name (e.g. Alzheimer disease 18), partial disease name
#' (Alzheimer) and/or disease acronym (e.g. AD). You can supply up to
#' 20 values.
#' @param omim \href{https://www.ncbi.nlm.nih.gov/omim}{OMIM} ID that is
#' associated with a variation. You can supply up to 20 values.
#' @param evidence Pubmed ID of the variation's
#' \href{https://www.uniprot.org/citations/}{citation} You can supply up
#' to 20 values.
#' @param taxid NIH-NCBI \href{https://www.uniprot.org/taxonomy/}{Taxon ID}.
#' You can supply up to 20 taxon IDs.
#' @param db_type cross-reference database of the variation. You can supply
#' up to two of the following:\itemize{
#' \item "dbSNP": \href{https://www.ncbi.nlm.nih.gov/snp/}{NIH-NCBI dbSNP
#' database}.
#' \item "cosmic curate": \href{https://cancer.sanger.ac.uk/cosmic/}{COSMIC
#' (the Catalogue of Somatic Mutations in Cancer)}
#' \item "ClinVar": \href{https://www.ncbi.nlm.nih.gov/clinvar/}{NIH-NCBI
#' ClinVar}
#' }
#' @param db_id The variation ID in a Cross-reference (external) database.
#' You can supply up to 20 values.
#' @param save_peff Logical or Character:\itemize{
#' \item FALSE: (default) Do not save PEFF file, just return as a list object.
#' \item TRUE: Save as PEFF file to an automatically-generated path.
#' \item Character string: A valid file path to save the PEFF file.}
#' @param ... rbioapi option(s). See \code{\link{rba_options}}'s
#' arguments manual for more information on available options.
#'
#' @return List where each element corresponds to one UniProt entity returned
#' by your search query. The element itself is a sub-list containing all
#' information that UniProt has about that Variation.
#'
#' @references \itemize{
#' \item The UniProt Consortium , UniProt: the Universal Protein
#' Knowledgebase in 2025, Nucleic Acids Research, 2024;, gkae1010,
#' https://doi.org/10.1093/nar/gkae1010
#' \item Andrew Nightingale, Ricardo Antunes, Emanuele Alpi, Borisas
#' Bursteinas, Leonardo Gonzales, Wudong Liu, Jie Luo, Guoying Qi, Edd
#' Turner, Maria Martin, The Proteins API: accessing key integrated protein
#' and genome information, Nucleic Acids Research, Volume 45, Issue W1,
#' 3 July 2017, Pages W539–W544, https://doi.org/10.1093/nar/gkx237
#' \item \href{https://www.ebi.ac.uk/proteins/api/doc/}{Proteins API
#' Documentation}
#' \item \href{https://www.uniprot.org/help/publications}{Citations note
#' on UniProt website}
#' }
#'
#' @examples
#' \donttest{
#' rba_uniprot_variation_search(accession = "P05067")
#' }
#' \donttest{
#' rba_uniprot_variation_search(disease = "alzheimer disease, 18")
#' }
#' \donttest{
#' rba_uniprot_variation_search(disease = "alzheimer",
#' wild_type = "A", alternative_sequence = "T")
#' }
#'
#' @family "UniProt - Variation"
#' @export
rba_uniprot_variation_search <- function(accession = NULL,
source_type = NULL,
consequence_type = NULL,
wild_type = NULL,
alternative_sequence = NULL,
location = NULL,
disease = NULL,
omim = NULL,
evidence = NULL,
taxid = NULL,
db_type = NULL,
db_id = NULL,
save_peff = FALSE,
...) {
## Load Global Options
.rba_ext_args(..., ignore_save = TRUE)
## Check User-input Arguments
.rba_args(
cons = list(
list(arg = "accession", class = "character", max_len = 100),
list(
arg = "source_type", class = "character", max_len = 2,
val = c("uniprot", "large scale study", "mixed")
),
list(
arg = "consequence_type", class = "character",
val = c("missense", "stop gained", "stop lost")
),
list(arg = "wild_type", class = "character", max_len = 20),
list(arg = "alternative_sequence", class = "character", max_len = 20),
list(arg = "location", class = "character", regex = "^\\d+\\-\\d+$", len = 1),
list(arg = "disease", class = "character"),
list(arg = "omim", class = "character", max_len = 20),
list(arg = "evidence", class = "numeric", max_len = 20),
list(arg = "taxid", class = "numeric", max_len = 20),
list(arg = "db_type", class = "character", max_len = 2),
list(arg = "db_id", class = "character", max_len = 20),
list(arg = "save_peff", class = c("logical", "character"))
),
cond = list(
list(
quote(all(is.null(accession), is.null(disease),
is.null(omim), is.null(evidence),
is.null(taxid), is.null(db_type),
is.null(db_id))),
"You should supply at least one of: accession, disease, omim, evidence, taxid, db_type or db_id"
)
)
)
.msg(
"Searching UniProt and retrieving natural variations of proteins that match your supplied inputs."
)
## Build GET API Request's query
call_query <- .rba_query(
init = list("size" = "-1"),
list("accession", !is.null(accession), paste0(accession, collapse = ",")),
list("sourcetype", !is.null(source_type), paste0(source_type, collapse = ",")),
list("consequencetype", !is.null(consequence_type), paste0(consequence_type, collapse = ",")),
list("wildtype", !is.null(wild_type), paste0(wild_type, collapse = ",")),
list("alternativesequence", !is.null(alternative_sequence), paste0(alternative_sequence, collapse = ",")),
list("location", !is.null(location), location),
list("disease", !is.null(disease), disease),
list("omim", !is.null(omim), paste0(omim, collapse = ",")),
list("evidence", !is.null(evidence), paste0(evidence, collapse = ",")),
list("taxid", !is.null(taxid), paste0(taxid, collapse = ",")),
list("dbtype", !is.null(db_type), paste0(db_type, collapse = ",")),
list("dbid", !is.null(db_id), paste0(db_type, collapse = ","))
)
## Build Function-Specific Call
save_to <- ifelse(
isFALSE(save_peff),
yes = .rba_file(file = "uniprot_variation.json"),
no = .rba_file(file = "uniprot_variation.peff",
save_to = save_peff)
)
obj_parser_input <- list("json->list", .rba_uniprot_search_namer)
input_call <- .rba_httr(
httr = "get",
url = .rba_stg("uniprot", "url"),
path = paste0(.rba_stg("uniprot", "pth"), "variation"),
query = call_query,
save_to = save_to,
file_accept = "text/x-peff",
file_parser = "text->chr",
obj_accept = "application/json",
obj_parser = obj_parser_input
)
## Call API
final_output <- .rba_skeleton(input_call)
return(final_output)
}
#' Get natural variants in UniProt by NIH-NCBI SNP database identifier
#'
#' Retrieve natural variant annotations of a sequence using UniProt protein
#' accession, dbSNP or HGVS expression.
#'
#' @section Corresponding API Resources:
#' "GET https://www.ebi.ac.uk/proteins/api/variation/dbsnp/\{dbid\}"
#' \cr "GET https://www.ebi.ac.uk/proteins/api/variation/hgvs/\{hgvs\}"
#' \cr "GET https://www.ebi.ac.uk/proteins/api/variation/\{accession\}"
#'
#' @param id An ID which can be either a
#' \href{https://www.uniprot.org/help/accession_numbers}{UniProt primary or
#' secondary accession}, NIH-NCBI dbSNP ID or HGVS expression.
#' \href{https://www.ncbi.nlm.nih.gov/snp/}{NIH-NCBI dbSNP id} or
#' \href{https://varnomen.hgvs.org/}{HGVS Expression}.
#' @param id_type The type of supplied ID argument, one of:
#' \href{https://www.uniprot.org/help/accession_numbers}{"uniprot"},
#' \href{https://www.ncbi.nlm.nih.gov/snp/}{"dbsnp"} or
#' \href{https://varnomen.hgvs.org/}{"hgvs"}
#' @param source_type Variation's source type. You can choose up to two of:
#' "UniProt", "large scale study" and/or "mixed".
#' @param consequence_type Variation's consequence type. You can choose up to
#' two of: "missense", "stop gained" or "stop lost".
#' @param wild_type Wild type amino acid. Accepted values are IUPAC
#' single-letter amino acid (e.g. D for Aspartic acid) and "*" for stop
#' codon. You can supply up to 20 values.
#' @param alternative_sequence Alternative amino acid. Accepted values are IUPAC
#' single-letter amino acid (e.g. D for Aspartic acid) and "*" for stop codon
#' and "-" for deletion. You can supply up to 20 values.
#' @param location A valid amino acid range (e.g. 10-25) within the sequence
#' range where the variation occurs. You can supply up to 20 values.
#' @param save_peff Logical or Character:\itemize{
#' \item FALSE: (default) Do not save PEFF file, just return as a list object.
#' \item TRUE: Save as PEFF file to an automatically-generated path.
#' \item Character string: A valid file path to save the PEFF file.}
#' @param ... rbioapi option(s). See \code{\link{rba_options}}'s
#' arguments manual for more information on available options.
#'
#' @return A list where each element is a list that corresponds to a UniProt
#' protein entry.
#'
#' @references \itemize{
#' \item The UniProt Consortium , UniProt: the Universal Protein
#' Knowledgebase in 2025, Nucleic Acids Research, 2024;, gkae1010,
#' https://doi.org/10.1093/nar/gkae1010
#' \item Andrew Nightingale, Ricardo Antunes, Emanuele Alpi, Borisas
#' Bursteinas, Leonardo Gonzales, Wudong Liu, Jie Luo, Guoying Qi, Edd
#' Turner, Maria Martin, The Proteins API: accessing key integrated protein
#' and genome information, Nucleic Acids Research, Volume 45, Issue W1,
#' 3 July 2017, Pages W539–W544, https://doi.org/10.1093/nar/gkx237
#' \item \href{https://www.ebi.ac.uk/proteins/api/doc/}{Proteins API
#' Documentation}
#' \item \href{https://www.uniprot.org/help/publications}{Citations note
#' on UniProt website}
#' }
#'
#' @examples
#' \donttest{
#' rba_uniprot_variation(id = "rs121434451", id_type = "dbsnp")
#' }
#' \donttest{
#' rba_uniprot_variation(id = "NC_000008.11:g.22119227C>T", id_type = "hgvs")
#' }
#' \donttest{
#' rba_uniprot_variation(id = "O43593", id_type = "uniprot")
#' }
#'
#' @family "UniProt - Variation"
#' @export
rba_uniprot_variation <- function(id,
id_type,
source_type = NULL,
consequence_type = NULL,
wild_type = NULL,
alternative_sequence = NULL,
location = NULL,
save_peff = FALSE,
...) {
## Load Global Options
.rba_ext_args(..., ignore_save = TRUE)
## Check User-input Arguments
.rba_args(
cons = list(
list(arg = "id", class = "character"),
list(
arg = "id_type", class = "character",
val = c("uniprot", "dbsnp", "hgvs")
),
list(
arg = "source_type", class = "character", max_len = 2,
val = c("uniprot", "large scale study", "mixed")
),
list(
arg = "consequence_type", class = "character", max_len = 2,
val = c("missense", "stop gained", "stop lost")
),
list(arg = "wild_type", class = "character", max_len = 20),
list(arg = "alternative_sequence", class = "character", max_len = 20),
list(arg = "location", class = "character"),
list(arg = "save_peff", class = c("logical", "character"))
)
)
.msg(
"Retrieving Natural variant of %s.",
ifelse(
id_type == "uniprot",
yes = paste0("UniProt protein ", id),
no = paste0(id_type, " id ", id)
)
)
## Build GET API Request's query
call_query <- .rba_query(
init = list("size" = "-1"),
list("sourcetype", !is.null(source_type), paste0(source_type, collapse = ",")),
list("consequencetype", !is.null(consequence_type), paste0(consequence_type, collapse = ",")),
list("wildtype", !is.null(wild_type), paste0(wild_type, collapse = ",")),
list("alternativesequence", !is.null(alternative_sequence), paste0(alternative_sequence, collapse = ",")),
list("location", !is.null(location), location)
)
## Build Function-Specific Call
file_name <- sprintf(
"uniprot_variation_%s.%s",
id_type, ifelse(isFALSE(save_peff), "json", "peff")
)
save_to <- ifelse(
isFALSE(save_peff),
yes = .rba_file(file = file_name),
no = .rba_file(file = file_name, save_to = save_peff)
)
path_input <- switch(
id_type,
"uniprot" = paste0(.rba_stg("uniprot", "pth"), "variation/", id),
"hgvs" = paste0(.rba_stg("uniprot", "pth"), "variation/hgvs/", id),
"dbsnp" = paste0(.rba_stg("uniprot", "pth"), "variation/dbsnp/", id)
)
input_call <- .rba_httr(
httr = "get",
url = .rba_stg("uniprot", "url"),
path = path_input,
query = call_query,
save_to = save_to,
file_accept = "text/x-peff",
file_parser = "text->chr",
obj_accept = "application/json",
obj_parser = "json->list"
)
## Call API
final_output <- .rba_skeleton(input_call)
return(final_output)
}
#### Antigen Endpoints ####
#' Search Antigens in UniProt
#'
#' UniProt maps Antigenic (Antibody-binding) features from different sources
#' to the proteins' sequences. Using this function, you can search for
#' Antigenic sequences that has been map to UniProt proteins. You may also
#' refine your search with modifiers such as score etc. See
#' "Arguments section" for more information.
#'
#' Note that this is a search function. Thus, you are not required to fill
#' every argument; You may use whatever combinations of arguments you see
#' fit for your query.
#'
#' @section Corresponding API Resources:
#' "GET https://www.ebi.ac.uk/proteins/api/antigen"
#'
#' @param accession \href{https://www.uniprot.org/help/accession_numbers}{
#' UniProtKB primary or secondary accession}(s). You can supply up to 100
#' accession numbers.
#' @param antigen_sequence Protein sequence in the antigenic site.
#' @param antigen_id Human Protein Atlas (HPA) antigen ID. You can supply up to
#' 20 IDs.
#' @param ensembl_id Ensembl Stable Transcript ID. You can supply up to
#' 20 IDs.
#' @param match_score (Numeric) Minimum alignment score for the antigen
#' sequence and the target protein sequence.
#' @param ... rbioapi option(s). See \code{\link{rba_options}}'s
#' arguments manual for more information on available options.
#'
#' @return A list Where each element correspond to a UniProt protein (search
#' hit) and Antigenic features are organized under the "features" sub-list.
#'
#' @references \itemize{
#' \item The UniProt Consortium , UniProt: the Universal Protein
#' Knowledgebase in 2025, Nucleic Acids Research, 2024;, gkae1010,
#' https://doi.org/10.1093/nar/gkae1010
#' \item Andrew Nightingale, Ricardo Antunes, Emanuele Alpi, Borisas
#' Bursteinas, Leonardo Gonzales, Wudong Liu, Jie Luo, Guoying Qi, Edd
#' Turner, Maria Martin, The Proteins API: accessing key integrated protein
#' and genome information, Nucleic Acids Research, Volume 45, Issue W1,
#' 3 July 2017, Pages W539–W544, https://doi.org/10.1093/nar/gkx237
#' \item \href{https://www.ebi.ac.uk/proteins/api/doc/}{Proteins API
#' Documentation}
#' \item \href{https://www.uniprot.org/help/publications}{Citations note
#' on UniProt website}
#' }
#'
#' @examples
#' \donttest{
#' rba_uniprot_antigens_search(antigen_id = "HPA001060")
#' }
#'
#' @family "UniProt - Antigen"
#' @export
rba_uniprot_antigens_search <- function(accession = NULL,
antigen_sequence = NULL,
antigen_id = NULL,
ensembl_id = NULL,
match_score = NULL,
...) {
## Load Global Options
.rba_ext_args(...)
## Check User-input Arguments
.rba_args(
cons = list(
list(arg = "accession", class = "character", max_len = 100),
list(arg = "antigen_sequence", class = "character"),
list(arg = "antigen_id", class = "character", max_len = 20),
list(arg = "ensembl_id", class = "character", max_len = 20),
list(arg = "match_score", class = "numeric")
)
)
.msg(
"Searching UniProt and retrieving antigenic features of proteins that match your supplied inputs."
)
## Build GET API Request's query
call_query <- .rba_query(
init = list("size" = "-1"),
list("accession", !is.null(accession), paste0(accession, collapse = ",")),
list("antigen_sequence", !is.null(antigen_sequence), antigen_sequence),
list("antigen_id", !is.null(antigen_id), paste0(antigen_id, collapse = ",")),
list("ensembl_id", !is.null(ensembl_id), paste0(ensembl_id, collapse = ",")),
list("match_score", !is.null(match_score), match_score)
)
## Build Function-Specific Call
parser_input <- list("json->list", .rba_uniprot_search_namer)
input_call <- .rba_httr(
httr = "get",
url = .rba_stg("uniprot", "url"),
path = paste0(.rba_stg("uniprot", "pth"), "antigen"),
query = call_query,
accept = "application/json",
parser = parser_input,
save_to = .rba_file("uniprot_antigen_search.json")
)
## Call API
final_output <- .rba_skeleton(input_call)
return(final_output)
}
#' Get Antigens by UniProt Accession
#'
#' UniProt maps Antigenic features from different sources to the proteins'
#' sequences. Using this function, you can retrieve all the Antigenic
#' features that has been map to a given UniProt protein's sequence.
#'
#' @section Corresponding API Resources:
#' "GET https://www.ebi.ac.uk/proteins/api/antigen/\{accession\}"
#'
#' @param accession \href{https://www.uniprot.org/help/accession_numbers}{
#' UniProtKB primary or secondary accession}(s).
#' @param ... rbioapi option(s). See \code{\link{rba_options}}'s
#' arguments manual for more information on available options.
#'
#' @return A list containing the Antigenic features of your supplied
#' UniProt protein's sequence.
#'
#' @references \itemize{
#' \item The UniProt Consortium , UniProt: the Universal Protein
#' Knowledgebase in 2025, Nucleic Acids Research, 2024;, gkae1010,
#' https://doi.org/10.1093/nar/gkae1010
#' \item Andrew Nightingale, Ricardo Antunes, Emanuele Alpi, Borisas
#' Bursteinas, Leonardo Gonzales, Wudong Liu, Jie Luo, Guoying Qi, Edd
#' Turner, Maria Martin, The Proteins API: accessing key integrated protein
#' and genome information, Nucleic Acids Research, Volume 45, Issue W1,
#' 3 July 2017, Pages W539–W544, https://doi.org/10.1093/nar/gkx237
#' \item \href{https://www.ebi.ac.uk/proteins/api/doc/}{Proteins API
#' Documentation}
#' \item \href{https://www.uniprot.org/help/publications}{Citations note
#' on UniProt website}
#' }
#'
#' @examples
#' \donttest{
#' rba_uniprot_antigens("P04626")
#' }
#'
#' @family "UniProt - Antigen"
#' @export
rba_uniprot_antigens <- function(accession,
...) {
## Load Global Options
.rba_ext_args(...)
## Check User-input Arguments
.rba_args(
cons = list(
list(arg = "accession", class = "character", len = 1)
)
)
.msg(
"Retrieving Antigenic features mapped to the sequence of protein %s.",
accession
)
## Build Function-Specific Call
input_call <- .rba_httr(
httr = "get",
url = .rba_stg("uniprot", "url"),
path = paste0(.rba_stg("uniprot", "pth"), "antigen/", accession),
accept = "application/json",
parser = "json->list",
save_to = .rba_file("uniprot_antigen.json")
)
## Call API
final_output <- .rba_skeleton(input_call)
return(final_output)
}
#### Epitopes Endpoints ####
#' Search UniProt Epitopes
#'
#' Use this function to search epitope data associated to UniProt entities,
#' using various criteria such as UniProt accession, epitope sequence,
#' IEDB ID, and match score.
#'
#' @section Corresponding API Resources:
#' "GET https://www.ebi.ac.uk/proteins/api/epitope"
#'
#' @param accession \href{https://www.uniprot.org/help/accession_numbers}{
#' UniProtKB primary or secondary accession}(s). You can supply up to 100
#' accession numbers.
#' @param epitope_sequence (Character) Epitope's proteins sequence
#' @param iedb_id (Numeric) \href{https://www.iedb.org/}{EIDB} epitope
#' Identifier(s). You can supply up to 20 accession numbers.
#' @param match_score Integer: Minimum alignment score for the antigen sequence
#' and the target protein sequence.
#' @param ... rbioapi option(s). See \code{\link{rba_options}}'s
#' arguments manual for more information on available options.
#'
#' @return A List where each element corresponds to one UniProt entity returned
#' by your search query. The element itself is a sub-list containing all
#' information that UniProt has about that entity.
#'
#' @examples
#' \donttest{
#' rba_uniprot_epitope_search(accession = c("Q84ZX5", "P36222"))
#' }
#' \donttest{
#' rba_uniprot_epitope_search(epitope_sequence = "DKKCIEWEKAQHGA")
#' }
#' \donttest{
#' rba_uniprot_epitope_search(iedb_id = 20354)
#' }
#'
#' @family "UniProt - Epitopes"
#' @export
rba_uniprot_epitope_search <- function(accession = NULL,
epitope_sequence = NULL,
iedb_id = NULL,
match_score = NULL,
...) {
## Load Global Options
.rba_ext_args(...)
## Check User-input Arguments
.rba_args(
cons = list(
list(arg = "accession", class = "character", max_len = 100),
list(arg = "epitope_sequence", class = "character"),
list(arg = "iedb_id", class = "numeric", max_len = 20),
list(arg = "match_score", class = "numeric")
)
)
.msg(
"Searching UniProt for epitopes matching the supplied criteria."
)
## Build GET API Request's query
call_query <- .rba_query(
init = list("size" = "-1"),
list("accession", !is.null(accession), paste0(accession, collapse = ",")),
list("epitope_sequence", !is.null(epitope_sequence), epitope_sequence),
list("iedb_id", !is.null(iedb_id), paste0(iedb_id, collapse = ",")),
list("match_score", !is.null(match_score), match_score)
)
## Build Function-Specific Call
parser_input <- list("json->list", .rba_uniprot_search_namer)
input_call <- .rba_httr(
httr = "get",
url = .rba_stg("uniprot", "url"),
path = paste0(.rba_stg("uniprot", "pth"), "epitope"),
query = call_query,
accept = "application/json",
parser = parser_input,
save_to = .rba_file("uniprot_epitope_search.json")
)
## Call API
final_output <- .rba_skeleton(input_call)
return(final_output)
}
#' Retrieve Epitopes by Accession
#'
#' Use this function to retrieve epitope annotations linked to a UniProt entry.
#'
#' @section Corresponding API Resources:
#' "GET https://www.ebi.ac.uk/proteins/api/epitope/\{accession\}"
#'
#' @param accession \href{https://www.uniprot.org/help/accession_numbers}{
#' UniProtKB primary or secondary accession}.
#' @param ... rbioapi option(s). See \code{\link{rba_options}}'s
#' arguments manual for more information on available options.
#'
#' @return A list containing the UniProt epitope features details for the given
#' accession.
#'
#' @examples
#' \donttest{
#' rba_uniprot_epitope(accession = "P36222")
#' }
#'
#' @family "UniProt - Epitopes"
#' @export
rba_uniprot_epitope <- function(accession, ...) {
## Load Global Options
.rba_ext_args(...)
## Check User-input Arguments
.rba_args(
cons = list(
list(arg = "accession", class = "character", len = 1)
)
)
.msg(
"Retrieving epitope information for accession %s.",
accession
)
## Build Function-Specific Call
parser_input <- "json->list"
input_call <- .rba_httr(
httr = "get",
url = .rba_stg("uniprot", "url"),
path = paste0(.rba_stg("uniprot", "pth"), "epitope/", accession),
accept = "application/json",
parser = parser_input,
save_to = .rba_file("uniprot_epitope.json")
)
## Call API
final_output <- .rba_skeleton(input_call)
return(final_output)
}
#### Mutagenesis Endpoints ####
#' Search Mutagenesis in UniProt
#'
#' UniProt describes the effects of mutations in proteins' amino acid
#' sequence on the biological properties of the protein, cell or the
#' organism. Using this function, you can search for
#' \href{https://www.uniprot.org/help/mutagen}{
#' mutagenesis description} in UniProt proteins.
#' You may also refine your search. See "Arguments section" for more
#' information.
#'
#' Note that this is a search function. Thus, you are not required to fill
#' every argument; You may use whatever combinations of arguments you see
#' fit for your query.
#'
#' @section Corresponding API Resources:
#' "GET https://www.ebi.ac.uk/proteins/api/mutagenesis"
#'
#' @param accession \href{https://www.uniprot.org/help/accession_numbers}{
#' UniProtKB primary or secondary accession}(s). You can supply up to 100
#' accession numbers.
#' @param taxid NIH-NCBI \href{https://www.uniprot.org/taxonomy/}{Taxon ID}.
#' You can supply up to 20 taxon IDs.
#' @param db_id The ID in a Cross-reference (external) database.
#' You can supply up to 20 values.
#' @param ... rbioapi option(s). See \code{\link{rba_options}}'s
#' arguments manual for more information on available options.
#'
#' @return A list Where each element correspond to a UniProt protein (search
#' hit) and mutagenesis description are organized under the
#' "features" sub-list.
#'
#' @references \itemize{
#' \item The UniProt Consortium , UniProt: the Universal Protein
#' Knowledgebase in 2025, Nucleic Acids Research, 2024;, gkae1010,
#' https://doi.org/10.1093/nar/gkae1010
#' \item Andrew Nightingale, Ricardo Antunes, Emanuele Alpi, Borisas
#' Bursteinas, Leonardo Gonzales, Wudong Liu, Jie Luo, Guoying Qi, Edd
#' Turner, Maria Martin, The Proteins API: accessing key integrated protein
#' and genome information, Nucleic Acids Research, Volume 45, Issue W1,
#' 3 July 2017, Pages W539–W544, https://doi.org/10.1093/nar/gkx237
#' \item \href{https://www.ebi.ac.uk/proteins/api/doc/}{Proteins API
#' Documentation}
#' \item \href{https://www.uniprot.org/help/publications}{Citations note
#' on UniProt website}
#' }
#'
#' @examples
#' \donttest{
#' #search all mutations in COVID19 proteins
#' rba_uniprot_mutagenesis_search(taxid = 2697049)
#' }
#'
#' @family "UniProt - Mutagenesis"
#' @export
rba_uniprot_mutagenesis_search <- function(accession = NULL,
taxid = NULL,
db_id = NULL,
...) {
## Load Global Options
.rba_ext_args(...)
## Check User-input Arguments
.rba_args(
cons = list(
list(arg = "accession", class = "character", max_len = 100),
list(arg = "taxid", class = "numeric", max_len = 20),
list(arg = "db_id", class = "character", max_len = 20)
)
)
.msg(
"Searching UniProt and retrieving mutagenesis description of proteins that match your supplied inputs."
)
## Build GET API Request's query
call_query <- .rba_query(
init = list("size" = "-1"),
list("accession", !is.null(accession), paste0(accession, collapse = ",")),
list("taxid", !is.null(taxid), paste0(taxid, collapse = ",")),
list("db_id", !is.null(db_id), paste0(db_id, collapse = ","))
)
## Build Function-Specific Call
parser_input <- list("json->list", .rba_uniprot_search_namer)
input_call <- .rba_httr(
httr = "get",
url = .rba_stg("uniprot", "url"),
path = paste0(.rba_stg("uniprot", "pth"), "mutagenesis"),
query = call_query,
accept = "application/json",
parser = parser_input,
save_to = .rba_file("uniprot_mutagenesis_search.json")
)
## Call API
final_output <- .rba_skeleton(input_call)
return(final_output)
}
#' Get Mutagenesis by UniProt Accession
#'
#' UniProt describes the effects of mutations in proteins' amino acid
#' sequence on the biological properties of the protein, cell or the
#' organism. Using this function, you can get the
#' \href{https://www.uniprot.org/help/mutagen}{
#' Mutagenesis description} that has been mapped to a given UniProt protein.
#'
#' @section Corresponding API Resources:
#' "GET https://www.ebi.ac.uk/proteins/api/mutagenesis/\{accession\}"
#'
#' @param accession \href{https://www.uniprot.org/help/accession_numbers}{
#' UniProtKB primary or secondary accession}(s).
#' @param location A valid amino acid range (e.g. 10-25) within the sequence
#' range of the given proein.
#' @param ... rbioapi option(s). See \code{\link{rba_options}}'s
#' arguments manual for more information on available options.
#'
#' @return A list containing the mutagenesis description of your supplied
#' UniProt protein's sequence.
#'
#' @references \itemize{
#' \item The UniProt Consortium , UniProt: the Universal Protein
#' Knowledgebase in 2025, Nucleic Acids Research, 2024;, gkae1010,
#' https://doi.org/10.1093/nar/gkae1010
#' \item Andrew Nightingale, Ricardo Antunes, Emanuele Alpi, Borisas
#' Bursteinas, Leonardo Gonzales, Wudong Liu, Jie Luo, Guoying Qi, Edd
#' Turner, Maria Martin, The Proteins API: accessing key integrated protein
#' and genome information, Nucleic Acids Research, Volume 45, Issue W1,
#' 3 July 2017, Pages W539–W544, https://doi.org/10.1093/nar/gkx237
#' \item \href{https://www.ebi.ac.uk/proteins/api/doc/}{Proteins API
#' Documentation}
#' \item \href{https://www.uniprot.org/help/publications}{Citations note
#' on UniProt website}
#' }
#'
#' @examples
#' \donttest{
#' rba_uniprot_mutagenesis(accession = "P0DTC2", location = "300-400")
#' }
#'
#' @family "UniProt - Mutagenesis"
#' @export
rba_uniprot_mutagenesis <- function(accession,
location = NULL,
...) {
## Load Global Options
.rba_ext_args(...)
## Check User-input Arguments
.rba_args(
cons = list(
list(arg = "accession", class = "character", len = 1),
list(arg = "location", class = "character", regex = "^\\d+\\-\\d+$", len = 1)
)
)
.msg(
"Retrieving mutagenesis description mapped to the sequence of protein %s.",
accession
)
## Build GET API Request's query
call_query <- .rba_query(
init = list(),
list("location", !is.null(location), location)
)
## Build Function-Specific Call
input_call <- .rba_httr(
httr = "get",
url = .rba_stg("uniprot", "url"),
path = paste0(.rba_stg("uniprot", "pth"), "mutagenesis/", accession),
accept = "application/json",
parser = "json->list",
query = call_query,
save_to = .rba_file("uniprot_mutagenesis.json")
)
## Call API
final_output <- .rba_skeleton(input_call)
return(final_output)
}
#### RNA-editing ####
#' Search RNA Editing in UniProt
#'
#' UniProt Curates \href{https://www.uniprot.org/help/rna_editing}{RNA-editing
#' events} (conversion, insertion, deletion of nucleotides). Use this
#' function to search RNA editing records in UniProt using various
#' criteria such as accession, taxon ID, or variant location.
#'
#' @section Corresponding API Resources:
#' "GET https://www.ebi.ac.uk/proteins/api/rna-editing"
#'
#' @param accession \href{https://www.uniprot.org/help/accession_numbers}{
#' UniProtKB primary or secondary accession}(s). You can supply up to 100
#' accession numbers.
#' @param taxid (Numeric) NIH-NCBI \href{https://www.uniprot.org/taxonomy/}{Taxon ID}.
#' You can supply up to 20 taxon IDs.
#' @param variantlocation Character: RNA editing variant location(s).
#' You can supply up to 20 taxon IDs.
#' @param ... rbioapi option(s). See \code{\link{rba_options}}'s
#' arguments manual for more information on available options.
#'
#' @return A List where each element corresponds to one UniProt entity returned
#' by your search query. The element itself is a sub-list containing all
#' information that UniProt has about that entity.
#'
#' @examples
#' \donttest{
#' rba_uniprot_rna_edit_search(accession = c("Q16851", "Q16849"))
#' }
#'
#' @family "UniProt - RNA Editing"
#' @export
rba_uniprot_rna_edit_search <- function(accession = NULL,
taxid = NULL,
variantlocation = NULL,
...) {
## Load Global Options
.rba_ext_args(...)
## Check User-input Arguments
.rba_args(
cons = list(
list(arg = "accession", class = "character", max_len = 100),
list(arg = "taxid", class = "numeric", max_len = 20),
list(arg = "variantlocation", class = "character", max_len = 4)
)
)
.msg(
"Searching UniProt for RNA editing records matching the supplied criteria."
)
## Build GET API Request's query
call_query <- .rba_query(
init = list("size" = "-1"),
list("accession", !is.null(accession), paste0(accession, collapse = ",")),
list("taxid", !is.null(taxid), paste0(taxid, collapse = ",")),
list("variantlocation", !is.null(variantlocation), paste0(variantlocation, collapse = ","))
)
## Build Function-Specific Call
parser_input <- list("json->list", .rba_uniprot_search_namer)
input_call <- .rba_httr(
httr = "get",
url = .rba_stg("uniprot", "url"),
path = paste0(.rba_stg("uniprot", "pth"), "rna-editing"),
query = call_query,
accept = "application/json",
parser = parser_input,
save_to = .rba_file("uniprot_rna_editing_search.json")
)
## Call API
final_output <- .rba_skeleton(input_call)
return(final_output)
}
#' Retrieve Epitope by Accession
#'
#' Use this function to retrieve
#' \href{https://www.uniprot.org/help/rna_editing}{RNA-editing
#' events} (conversion, insertion, deletion of nucleotides) annotations
#' linked to a UniProt entry.
#'
#' @section Corresponding API Resources:
#' "GET https://www.ebi.ac.uk/proteins/api/rna-edit/\{accession\}"
#'
#' @param accession \href{https://www.uniprot.org/help/accession_numbers}{
#' UniProtKB primary or secondary accession}.
#' @param ... rbioapi option(s). See \code{\link{rba_options}}'s
#' arguments manual for more information on available options.
#'
#' @return A list containing the UniProt RNA-editing features details for the
#' given accession.
#'
#' @examples
#' \donttest{
#' rba_uniprot_rna_edit(accession = "Q16851")
#' }
#'
#' @family "UniProt - Epitopes"
#' @export
rba_uniprot_rna_edit <- function(accession, ...) {
## Load Global Options
.rba_ext_args(...)
## Check User-input Arguments
.rba_args(
cons = list(
list(arg = "accession", class = "character", len = 1)
)
)
.msg(
"Retrieving RNA-editing information for accession %s.",
accession
)
## Build Function-Specific Call
input_call <- .rba_httr(
httr = "get",
url = .rba_stg("uniprot", "url"),
path = paste0(.rba_stg("uniprot", "pth"), "rna-editing/", accession),
accept = "application/json",
parser = "json->list",
save_to = .rba_file("uniprot_rna_edit.json")
)
## Call API
final_output <- .rba_skeleton(input_call)
return(final_output)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.