#' Get uniprot info from organism
#'
#' @param organism character, name of organism
#' @param path path at which the info file shall be stored locally.
#' @param update shall the internal \code{\link{cachedir}} file be deleted and the info
#' file freshly downloaded from the UniProt API?
#' @export
getUniProtInfo <- function(organism, path = cachedir(), update = TRUE) {
organism_new <- stringr::str_replace_all(organism, " ", "%20")
organism_name_path <- stringr::str_replace_all(organism, " ", "_")
file <- file.path(cachedir(), paste0(organism_name_path, "_uniprot_info.tsv"))
ebi_url <- "https://www.ebi.ac.uk/"
ebi_url_proteome <- paste0(ebi_url, "proteins/api/proteomes")
if (file.exists(file) && !update) {
suppressWarnings(uniprot_species_info <- read_uniprot_info(file))
} else {
tryCatch({
uniprot_species_info <-
tibble::as_tibble(jsonlite::fromJSON(
paste0(ebi_url_proteome, "?offset=0&size=-1&name=", organism_new)
))
uniprot_species_info <- write_uniprot_info(file, uniprot_species_info,
organism)
}, error = function(e)
message(
"Something went wrong when trying to access the API '", paste0(ebi_url_proteome, "?offset=0&size=-1&name=", organism_new), "'",
" Sometimes the internet connection isn't stable and re-running the function might help. Is it possible to access the homepage '",
ebi_url, "' through your browser?"
))}
return(uniprot_species_info)
}
read_uniprot_info <- function(file) {
readr::read_tsv(
file,
col_names = TRUE,
col_types = readr::cols(
name = readr::col_character(),
upid = readr::col_integer(),
taxonomy = readr::col_integer(),
isReferenceProteome = readr::col_logical(),
isRepresentativeProteome = readr::col_logical()
)
)
}
write_uniprot_info <- function(file, uniprot_species_info, organism) {
name <- upid <- taxonomy <- isReferenceProteome <-
isRepresentativeProteome <- superregnum <- NULL
uniprot_species_info <-
dplyr::filter(uniprot_species_info, stringr::str_detect(name, organism))
if (nrow(uniprot_species_info) > 1) {
# message("There are more than one entry for '",organism,"'.",
# " Please select the one below that you prefer and re-run this function using the full name you chose.")
# message("\n")
# message("Options are: ", paste0("organism = '",uniprot_species_info$name,"', "),".")
# Subset to valid reference
is_reference <- uniprot_species_info$isReferenceProteome
if (any(is_reference)) {
uniprot_species_info <- uniprot_species_info[is_reference, ]
}
most_expansive_assembly <- which.max(sapply(uniprot_species_info$dbReference, nrow))
uniprot_species_info <- uniprot_species_info[most_expansive_assembly,]
}
if (nrow(uniprot_species_info) == 0) {
warning("Could not find a valid reference assembly id for uniprot for this species!")
return(uniprot_species_info)
}
uniprot_species_info <-
dplyr::select(
uniprot_species_info,
name,
upid,
taxonomy,
isReferenceProteome,
isRepresentativeProteome,
superregnum
)
readr::write_tsv(uniprot_species_info, file)
return(uniprot_species_info)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.