Nothing
## ----setup, echo=FALSE, warning=FALSE-----------------------------------------
knitr::opts_chunk$set(
echo = TRUE,
eval = TRUE,
warning = FALSE,
message = FALSE,
collapse = TRUE,
comment = "#>",
fig.width = 7,
fig.height = 5,
warning = FALSE,
message = FALSE
)
suppressPackageStartupMessages(library(rPDBapi))
suppressPackageStartupMessages(library(dplyr))
have_r3dmol <- requireNamespace("r3dmol", quietly = TRUE)
have_shiny <- requireNamespace("shiny", quietly = TRUE)
selected_entry <- "4HHB"
quietly <- function(expr) suppressMessages(eval.parent(substitute(expr)))
## ----installation, eval = FALSE-----------------------------------------------
# install.packages("rPDBapi")
#
# # Development version
# remotes::install_github("selcukorkmaz/rPDBapi")
## ----libraries----------------------------------------------------------------
suppressPackageStartupMessages(library(rPDBapi))
suppressPackageStartupMessages(library(dplyr))
## ----concepts-----------------------------------------------------------------
kinase_full_text <- DefaultOperator("protein kinase")
high_resolution <- RangeOperator(
attribute = "rcsb_entry_info.resolution_combined",
from_value = 0,
to_value = 2.5
)
xray_method <- ExactMatchOperator(
attribute = "exptl.method",
value = "X-RAY DIFFRACTION"
)
kinase_query <- QueryGroup(
queries = list(kinase_full_text, xray_method, high_resolution),
logical_operator = "AND"
)
kinase_query
## ----request-options----------------------------------------------------------
search_controls <- RequestOptions(
result_start_index = 0,
num_results = 10,
sort_by = "score",
desc = TRUE
)
search_controls
## ----identifier-helpers-------------------------------------------------------
example_ids <- c("4HHB", "4HHB-1", "4HHB_1", "4HHB.A", "ATP")
dplyr::tibble(
id = example_ids,
inferred_type = infer_id_type(example_ids)
)
parse_rcsb_id("4HHB-1")
build_entry_id(" 4HHB ")
build_assembly_id("4HHB", 1)
build_entity_id("4HHB", 1)
build_instance_id("4HHB", "A")
## ----simple-search, eval = TRUE-----------------------------------------------
kinase_hits <- query_search("protein kinase")
head(kinase_hits, 10)
class(kinase_hits)
attr(kinase_hits, "return_type")
## ----advanced-search, eval = TRUE---------------------------------------------
kinase_entry_ids <- perform_search(
search_operator = kinase_query,
return_type = "ENTRY",
request_options = search_controls,
verbosity = FALSE
)
kinase_entry_ids
class(kinase_entry_ids)
## ----entry-properties---------------------------------------------------------
entry_properties <- list(
rcsb_id = list(),
struct = c("title"),
struct_keywords = c("pdbx_keywords"),
exptl = c("method"),
rcsb_entry_info = c("molecular_weight", "resolution_combined"),
rcsb_accession_info = c("initial_release_date")
)
entry_properties
## ----schema-aware-properties--------------------------------------------------
head(list_rcsb_fields("ENTRY"), 10)
search_rcsb_fields("resolution", data_type = "ENTRY")
validate_properties(
properties = entry_properties,
data_type = "ENTRY",
strict = TRUE
)
validate_properties(
properties = list(
rcsb_entry_info = c("resolution_combined", "unknown_subfield")
),
data_type = "ENTRY",
strict = FALSE
)
## ----strict-validation-pattern, eval = TRUE-----------------------------------
old_opt <- options(rPDBapi.strict_property_validation = TRUE)
on.exit(options(old_opt), add = TRUE)
generate_json_query(
ids = c("4HHB"),
data_type = "ENTRY",
properties = list(rcsb_entry_info = c("resolution_combined"))
)
## ----entry-metadata, eval = TRUE----------------------------------------------
kinase_metadata <- data_fetcher(
id = kinase_entry_ids[1:5],
data_type = "ENTRY",
properties = entry_properties,
return_as_dataframe = TRUE
)
kinase_metadata
## ----raw-query, eval = TRUE---------------------------------------------------
kinase_json_query <- generate_json_query(
ids = kinase_entry_ids[1:3],
data_type = "ENTRY",
properties = entry_properties
)
cat(kinase_json_query)
## ----raw-response, eval = TRUE------------------------------------------------
kinase_raw <- fetch_data(
json_query = kinase_json_query,
data_type = "ENTRY",
ids = kinase_entry_ids[1:3]
)
str(kinase_raw, max.level = 2)
## ----tidy-conversion, eval = TRUE---------------------------------------------
kinase_tidy <- return_data_as_dataframe(
response = kinase_raw,
data_type = "ENTRY",
ids = kinase_entry_ids[1:3]
)
kinase_tidy
## ----batch-fetch, eval = TRUE-------------------------------------------------
cache_dir <- file.path(tempdir(), "rpdbapi-vignette-cache")
kinase_batch <- data_fetcher_batch(
id = kinase_entry_ids[1:5],
data_type = "ENTRY",
properties = entry_properties,
return_as_dataframe = TRUE,
batch_size = 2,
retry_attempts = 2,
retry_backoff = 0,
cache = TRUE,
cache_dir = cache_dir,
progress = FALSE,
verbosity = FALSE
)
kinase_batch
attr(kinase_batch, "provenance")
cache_info(cache_dir)
## ----clear-cache, eval = TRUE-------------------------------------------------
clear_rpdbapi_cache(cache_dir)
cache_info(cache_dir)
## ----batch-strategy, eval = TRUE----------------------------------------------
# Use data_fetcher() when:
# - the ID set is small
# - you want the simplest request path
# - retry, cache, and provenance are unnecessary
# Use data_fetcher_batch() when:
# - the ID set is large
# - requests may need retries
# - repeated retrieval should reuse cached results
# - you want an explicit provenance record
## ----provenance-interpretation, eval = TRUE-----------------------------------
provenance_tbl <- dplyr::tibble(
field = names(attr(kinase_batch, "provenance")),
value = vapply(
attr(kinase_batch, "provenance"),
function(x) {
if (is.list(x)) "<list>" else as.character(x)
},
character(1)
)
)
provenance_tbl
## ----assembly-search, eval = TRUE---------------------------------------------
kinase_assembly_ids <- perform_search(
search_operator = kinase_query,
return_type = "ASSEMBLY",
request_options = RequestOptions(result_start_index = 0, num_results = 5),
verbosity = FALSE
)
kinase_assembly_ids
## ----assembly-metadata, eval = TRUE-------------------------------------------
assembly_properties <- list(
rcsb_id = list(),
pdbx_struct_assembly = c("details", "method_details", "oligomeric_count"),
rcsb_struct_symmetry = c("kind", "symbol")
)
kinase_assemblies <- data_fetcher(
id = kinase_assembly_ids,
data_type = "ASSEMBLY",
properties = assembly_properties,
return_as_dataframe = TRUE
)
kinase_assemblies
## ----assembly-objects, eval = TRUE--------------------------------------------
assembly_object <- as_rpdb_assembly(
kinase_assemblies,
metadata = list(query = "protein kinase assemblies")
)
assembly_object
dplyr::as_tibble(assembly_object)
summarize_assemblies(assembly_object)
## ----identifier-aware-retrieval-----------------------------------------------
dplyr::tibble(
example_id = c("4HHB", "4HHB-1", "4HHB_1", "4HHB.A", "ATP"),
inferred_type = infer_id_type(c("4HHB", "4HHB-1", "4HHB_1", "4HHB.A", "ATP"))
)
parse_rcsb_id("4HHB.A")
## ----identifier-aware-fetch, eval = TRUE--------------------------------------
# Entry-level retrieval
data_fetcher(
id = build_entry_id("4HHB"),
data_type = "ENTRY",
properties = list(rcsb_id = list())
)
# Assembly-level retrieval
data_fetcher(
id = build_assembly_id("4HHB", 1),
data_type = "ASSEMBLY",
properties = list(rcsb_id = list())
)
# Polymer-entity retrieval
data_fetcher(
id = build_entity_id("4HHB", 1),
data_type = "POLYMER_ENTITY",
properties = list(rcsb_id = list())
)
## ----polymer-search, eval = TRUE----------------------------------------------
kinase_polymer_ids <- perform_search(
search_operator = kinase_query,
return_type = "POLYMER_ENTITY",
request_options = RequestOptions(result_start_index = 0, num_results = 5),
verbosity = FALSE
)
kinase_polymer_ids
## ----polymer-metadata, eval = TRUE--------------------------------------------
polymer_properties <- list(
rcsb_id = list(),
rcsb_entity_source_organism = c("ncbi_taxonomy_id", "ncbi_scientific_name"),
rcsb_cluster_membership = c("cluster_id", "identity")
)
kinase_polymer_metadata <- data_fetcher(
id = kinase_polymer_ids,
data_type = "POLYMER_ENTITY",
properties = polymer_properties,
return_as_dataframe = TRUE
)
kinase_polymer_metadata
## ----taxonomy-extraction, eval = TRUE-----------------------------------------
polymer_object <- as_rpdb_polymer_entity(
kinase_polymer_metadata,
metadata = list(query = "kinase polymer entities")
)
taxonomy_table <- extract_taxonomy_table(polymer_object)
taxonomy_table
taxonomy_table %>%
count(ncbi_scientific_name, sort = TRUE)
## ----entry-detail, eval = TRUE------------------------------------------------
selected_entry <- kinase_entry_ids[[1]]
selected_info <- quietly(get_info(selected_entry))
entry_summary <- dplyr::tibble(
rcsb_id = selected_entry,
title = purrr::pluck(selected_info, "struct", "title", .default = NA_character_),
keywords = purrr::pluck(selected_info, "struct_keywords", "pdbx_keywords", .default = NA_character_),
method = purrr::pluck(selected_info, "exptl", 1, "method", .default = NA_character_),
citation_title = purrr::pluck(selected_info, "rcsb_primary_citation", "title", .default = NA_character_),
resolution = paste(
purrr::pluck(selected_info, "rcsb_entry_info", "resolution_combined", .default = NA),
collapse = "; "
)
)
entry_summary
## ----literature-links, eval = TRUE, warning=FALSE-----------------------------
if (!exists("selected_entry", inherits = TRUE) || !nzchar(selected_entry)) {
selected_entry <- "4HHB"
}
literature_term <- selected_entry
kinase_papers <- quietly(find_papers(literature_term, max_results = 3))
kinase_keywords <- quietly(find_results(literature_term, field = "struct_keywords"))
kinase_papers
head(kinase_keywords, 3)
## ----coordinates, eval = TRUE-------------------------------------------------
kinase_structure <- get_pdb_file(
pdb_id = selected_entry,
filetype = "cif",
verbosity = FALSE
)
coordinate_matrix <- matrix(kinase_structure$xyz, ncol = 3, byrow = TRUE)
coordinate_df <- data.frame(
x = coordinate_matrix[, 1],
y = coordinate_matrix[, 2],
z = coordinate_matrix[, 3]
)
calpha_atoms <- cbind(
kinase_structure$atom[kinase_structure$calpha, c("chain", "resno", "resid")],
coordinate_df[kinase_structure$calpha, , drop = FALSE]
)
head(calpha_atoms, 10)
## ----calpha-helper, eval = TRUE-----------------------------------------------
calpha_atoms <- extract_calpha_coordinates(kinase_structure)
head(calpha_atoms, 10)
## ----fasta, eval = TRUE-------------------------------------------------------
kinase_sequences <- get_fasta_from_rcsb_entry(selected_entry, verbosity = FALSE)
length(kinase_sequences)
utils::head(nchar(unlist(kinase_sequences)))
## ----structure-sequence-join, eval = TRUE-------------------------------------
chain_sequence_summary <- join_structure_sequence(
kinase_structure,
kinase_sequences
)
chain_sequence_summary
## ----object-model-local-------------------------------------------------------
entry_demo <- as_rpdb_entry(
data.frame(
rcsb_id = c("4HHB", "1CRN"),
method = c("X-RAY DIFFRACTION", "SOLUTION NMR"),
resolution_combined = c("1.74", NA),
stringsAsFactors = FALSE
),
metadata = list(example = "local object demo")
)
entry_demo
dplyr::as_tibble(entry_demo)
summarize_entries(entry_demo)
entry_demo$metadata
## ----structure-object-local---------------------------------------------------
structure_demo <- as_rpdb_structure(
list(
atom = data.frame(
chain = c("A", "A"),
resno = c(1L, 2L),
resid = c("GLY", "ALA"),
stringsAsFactors = FALSE
),
xyz = c(1, 2, 3, 4, 5, 6),
calpha = c(TRUE, FALSE)
),
metadata = list(source = "illustration")
)
structure_demo
dplyr::as_tibble(structure_demo)
## ----downstream-analysis, eval = TRUE-----------------------------------------
entry_object <- as_rpdb_entry(
kinase_metadata,
metadata = list(query = "protein kinase entry metadata")
)
summarize_entries(entry_object)
kinase_summary <- dplyr::as_tibble(entry_object) %>%
mutate(
molecular_weight = as.numeric(molecular_weight),
resolution_combined = as.numeric(resolution_combined),
initial_release_date = as.Date(initial_release_date)
) %>%
arrange(resolution_combined) %>%
select(
rcsb_id,
title,
pdbx_keywords,
method,
molecular_weight,
resolution_combined,
initial_release_date
)
kinase_summary
kinase_summary %>%
summarise(
n_structures = n(),
median_molecular_weight = median(molecular_weight, na.rm = TRUE),
best_resolution = min(resolution_combined, na.rm = TRUE)
)
## ----taxonomy-summary, eval = TRUE--------------------------------------------
kinase_polymer_metadata %>%
count(ncbi_scientific_name, sort = TRUE)
## ----r3dmol-view, eval = have_r3dmol && have_shiny----------------------------
r3d <- asNamespace("r3dmol")
visualization_entry <- "4HHB"
saved_structure <- quietly(get_pdb_file(
pdb_id = visualization_entry,
filetype = "pdb",
save = TRUE,
path = tempdir(),
verbosity = FALSE
))
r3d$r3dmol() %>%
r3d$m_add_model(data = saved_structure$path, format = "pdb") %>%
r3d$m_set_style(style = r3d$m_style_cartoon(color = "spectrum")) %>%
r3d$m_zoom_to()
## ----sequence-operator--------------------------------------------------------
kinase_motif_sequence <- "VAIKTLKPGTMSPEAFLQEAQVMKKLRHEKLVQLYAVV"
sequence_operator <- SequenceOperator(
sequence = kinase_motif_sequence,
sequence_type = "PROTEIN",
evalue_cutoff = 10,
identity_cutoff = 0.7
)
sequence_operator
autoresolve_sequence_type("ATGCGTACGTAGC")
autoresolve_sequence_type("AUGCGUACGUAGC")
## ----sequence-search, eval = TRUE---------------------------------------------
sequence_hits <- perform_search(
search_operator = sequence_operator,
return_type = "POLYMER_ENTITY",
request_options = RequestOptions(result_start_index = 0, num_results = 5),
verbosity = FALSE
)
sequence_hits
## ----seqmotif-operator--------------------------------------------------------
prosite_like_motif <- SeqMotifOperator(
pattern = "[LIV][ACDEFGHIKLMNPQRSTVWY]K[GST]",
sequence_type = "PROTEIN",
pattern_type = "REGEX"
)
prosite_like_motif
## ----seqmotif-search, eval = TRUE---------------------------------------------
motif_hits <- perform_search(
search_operator = prosite_like_motif,
return_type = "POLYMER_ENTITY",
request_options = RequestOptions(result_start_index = 0, num_results = 5),
verbosity = FALSE
)
motif_hits
## ----structure-operator-------------------------------------------------------
structure_operator <- StructureOperator(
pdb_entry_id = "4HHB",
assembly_id = 1,
search_mode = "RELAXED_SHAPE_MATCH"
)
structure_operator
infer_search_service(structure_operator)
## ----structure-search, eval = TRUE--------------------------------------------
structure_hits <- perform_search(
search_operator = QueryNode(structure_operator),
return_type = "ASSEMBLY",
request_options = RequestOptions(result_start_index = 0, num_results = 5),
verbosity = FALSE
)
structure_hits
## ----chemical-operator--------------------------------------------------------
atp_like_operator <- ChemicalOperator(
descriptor = "O=P(O)(O)OP(=O)(O)OP(=O)(O)O",
matching_criterion = "fingerprint-similarity"
)
atp_like_operator
infer_search_service(atp_like_operator)
## ----chemical-search, eval = TRUE---------------------------------------------
chemical_hits <- perform_search(
search_operator = QueryNode(atp_like_operator),
return_type = "CHEMICAL_COMPONENT",
request_options = RequestOptions(result_start_index = 0, num_results = 5),
verbosity = FALSE
)
chemical_hits
## ----operator-reference-------------------------------------------------------
exact_resolution <- ExactMatchOperator(
attribute = "exptl.method",
value = "X-RAY DIFFRACTION"
)
organism_inclusion <- InOperator(
attribute = "rcsb_entity_source_organism.taxonomy_lineage.name",
value = c("Homo sapiens", "Mus musculus")
)
title_words <- ContainsWordsOperator(
attribute = "struct.title",
value = "protein kinase"
)
title_phrase <- ContainsPhraseOperator(
attribute = "struct.title",
value = "protein kinase"
)
resolution_cutoff <- ComparisonOperator(
attribute = "rcsb_entry_info.resolution_combined",
value = 2.0,
comparison_type = "LESS"
)
resolution_window <- RangeOperator(
attribute = "rcsb_entry_info.resolution_combined",
from_value = 1.0,
to_value = 2.5
)
doi_exists <- ExistsOperator("rcsb_primary_citation.pdbx_database_id_doi")
list(
exact_resolution = exact_resolution,
organism_inclusion = organism_inclusion,
title_words = title_words,
title_phrase = title_phrase,
resolution_cutoff = resolution_cutoff,
resolution_window = resolution_window,
doi_exists = doi_exists
)
## ----querynode-scoredresult---------------------------------------------------
operator_node <- QueryNode(title_words)
composite_query <- QueryGroup(
queries = list(title_words, resolution_window, doi_exists),
logical_operator = "AND"
)
scored_example <- ScoredResult(entity_id = "4HHB", score = 0.98)
operator_node
composite_query
scored_example
## ----scored-search-results, eval = TRUE---------------------------------------
scored_structure_hits <- perform_search(
search_operator = QueryNode(structure_operator),
return_type = "ASSEMBLY",
request_options = RequestOptions(result_start_index = 0, num_results = 3),
return_with_scores = TRUE,
verbosity = FALSE
)
scored_structure_hits
class(scored_structure_hits)
## ----query-composition-strategy, eval = TRUE----------------------------------
# Pattern: build small reusable operators first
title_filter <- ContainsPhraseOperator("struct.title", "protein kinase")
resolution_filter <- ComparisonOperator(
"rcsb_entry_info.resolution_combined",
2.5,
"LESS_OR_EQUAL"
)
# Combine them only when the biological question is clear
query_graph <- QueryGroup(
queries = list(
title_filter,
resolution_filter
),
logical_operator = "AND"
)
## ----query-search-variants, eval = TRUE---------------------------------------
# PubMed-linked structures
query_search(search_term = 27499440, query_type = "PubmedIdQuery")
# Organism/taxonomy search
organism_search <- query_search(search_term = "9606", query_type = "TreeEntityQuery")
head(organism_search)
# Experimental method search
experimental_search <- query_search(search_term = "X-RAY DIFFRACTION", query_type = "ExpTypeQuery")
head(experimental_search)
# Author search
query_search(search_term = "Kuriyan, J.", query_type = "AdvancedAuthorQuery")
# UniProt-linked entries
query_search(search_term = "P31749", query_type = "uniprot")
# PFAM-linked entries
pfam_search <- query_search(search_term = "PF00069", query_type = "pfam")
head(pfam_search)
## ----scan-params-example------------------------------------------------------
custom_scan_params <- list(
request_options = list(
paginate = list(start = 0, rows = 5),
return_all_hits = FALSE
)
)
custom_scan_params
## ----query-search-scan-params, eval = TRUE------------------------------------
limited_kinase_hits <- query_search(
search_term = "protein kinase",
scan_params = custom_scan_params
)
limited_kinase_hits
## ----add-property-------------------------------------------------------------
base_properties <- list(
rcsb_entry_info = c("resolution_combined"),
exptl = c("method")
)
extended_properties <- add_property(list(
rcsb_entry_info = c("molecular_weight", "resolution_combined"),
struct = c("title")
))
base_properties
extended_properties
## ----property-design-pattern--------------------------------------------------
property_workflow <- add_property(list(
rcsb_id = list(),
struct = c("title"),
rcsb_entry_info = c("resolution_combined")
))
property_workflow <- add_property(list(
rcsb_entry_info = c("molecular_weight", "resolution_combined"),
exptl = c("method")
))
property_workflow
validate_properties(property_workflow, data_type = "ENTRY", strict = FALSE)
## ----ligand-component-properties----------------------------------------------
ligand_properties <- list(
rcsb_id = list(),
chem_comp = c("id", "name", "formula", "formula_weight", "type"),
rcsb_chem_comp_info = c("initial_release_date")
)
ligand_properties
## ----chemical-component-fetch, eval = TRUE------------------------------------
chemical_component_df <- data_fetcher(
id = head(chemical_hits, 3),
data_type = "CHEMICAL_COMPONENT",
properties = ligand_properties,
return_as_dataframe = TRUE
)
chemical_component_df
## ----ligand-object-helper, eval = TRUE----------------------------------------
ligand_object <- as_rpdb_chemical_component(
chemical_component_df,
metadata = list(query = "ATP-like chemical components")
)
extract_ligand_table(ligand_object)
## ----describe-chemical, eval = TRUE-------------------------------------------
atp_description <- quietly(describe_chemical("ATP"))
dplyr::tibble(
chem_id = "ATP",
name = purrr::pluck(atp_description, "chem_comp", "name", .default = NA_character_),
formula = purrr::pluck(atp_description, "chem_comp", "formula", .default = NA_character_),
formula_weight = purrr::pluck(atp_description, "chem_comp", "formula_weight", .default = NA),
smiles = purrr::pluck(atp_description, "rcsb_chem_comp_descriptor", "smiles", .default = NA_character_)
)
## ----instance-level-examples, eval = TRUE-------------------------------------
# Polymer chain instance
polymer_instance <- data_fetcher(
id = "4HHB.A",
data_type = "POLYMER_ENTITY_INSTANCE",
properties = list(rcsb_id = list()),
return_as_dataframe = TRUE,
verbosity = FALSE
)
# Non-polymer instance (heme in hemoglobin entry 4HHB)
nonpolymer_instance <- data_fetcher(
id = "4HHB.E",
data_type = "NONPOLYMER_ENTITY_INSTANCE",
properties = list(rcsb_id = list()),
return_as_dataframe = TRUE,
verbosity = FALSE
)
polymer_instance
nonpolymer_instance
## ----low-level-url------------------------------------------------------------
entry_url <- get_pdb_api_url("core/entry/", "4HHB")
chem_url <- get_pdb_api_url("core/chemcomp/", "ATP")
entry_url
chem_url
## ----low-level-lifecycle, eval = TRUE-----------------------------------------
# Manual request lifecycle
url <- get_pdb_api_url("core/entry/", "4HHB")
response <- send_api_request(url, verbosity = FALSE)
handle_api_errors(response, url)
payload <- parse_response(response, format = "json")
## ----low-level-http, eval = TRUE----------------------------------------------
entry_response <- send_api_request(entry_url, verbosity = FALSE)
handle_api_errors(entry_response, entry_url)
entry_payload <- parse_response(entry_response, format = "json")
names(entry_payload)[1:5]
## ----graphql-low-level, eval = TRUE-------------------------------------------
mini_graphql <- generate_json_query(
ids = kinase_entry_ids[1:2],
data_type = "ENTRY",
properties = list(rcsb_id = list(), struct = c("title"))
)
mini_graphql_response <- search_graphql(list(query = mini_graphql))
str(mini_graphql_response, max.level = 2)
## ----contracts-live, eval = TRUE----------------------------------------------
list(
query_search_class = class(query_search("kinase")),
perform_search_class = class(
perform_search(DefaultOperator("kinase"), verbosity = FALSE)
),
perform_search_scores_class = class(
perform_search(
DefaultOperator("kinase"),
return_with_scores = TRUE,
verbosity = FALSE
)
)
)
## ----fetch-contracts, eval = TRUE---------------------------------------------
raw_entry_response <- data_fetcher(
id = kinase_entry_ids[1:2],
data_type = "ENTRY",
properties = list(rcsb_id = list()),
return_as_dataframe = FALSE
)
tidy_entry_response <- data_fetcher(
id = kinase_entry_ids[1:2],
data_type = "ENTRY",
properties = list(rcsb_id = list()),
return_as_dataframe = TRUE
)
class(raw_entry_response)
class(tidy_entry_response)
## ----object-contracts, eval = TRUE--------------------------------------------
list(
entry_object_class = class(as_rpdb_entry(kinase_metadata)),
assembly_object_class = class(as_rpdb_assembly(kinase_assemblies)),
polymer_object_class = class(as_rpdb_polymer_entity(kinase_polymer_metadata)),
structure_object_class = class(as_rpdb_structure(kinase_structure)),
batch_provenance_names = names(attr(kinase_batch, "provenance"))
)
## ----object-methods-local-----------------------------------------------------
local_entry_object <- as_rpdb_entry(
data.frame(
rcsb_id = "4HHB",
method = "X-RAY DIFFRACTION",
resolution_combined = "1.74",
stringsAsFactors = FALSE
),
metadata = list(source = "local method demo")
)
print(local_entry_object)
dplyr::as_tibble(local_entry_object)
## ----defensive-patterns-------------------------------------------------------
invalid_property_result <- tryCatch(
validate_properties(
properties = list(unknown_field = c("x")),
data_type = "ENTRY",
strict = TRUE
),
rPDBapi_error_invalid_input = function(e) e
)
invalid_fetch_result <- tryCatch(
data_fetcher(
id = character(0),
data_type = "ENTRY",
properties = list(rcsb_id = list())
),
rPDBapi_error_invalid_input = function(e) e
)
list(
invalid_property_class = class(invalid_property_result),
invalid_property_message = conditionMessage(invalid_property_result),
invalid_fetch_class = class(invalid_fetch_result),
invalid_fetch_message = conditionMessage(invalid_fetch_result)
)
## ----export-reference, results = "asis", echo=FALSE---------------------------
export_reference <- data.frame(
Function = c(
"query_search", "perform_search", "DefaultOperator", "ExactMatchOperator",
"InOperator", "ContainsWordsOperator", "ContainsPhraseOperator",
"ComparisonOperator", "RangeOperator", "ExistsOperator",
"SequenceOperator", "autoresolve_sequence_type", "SeqMotifOperator",
"StructureOperator", "ChemicalOperator", "QueryNode", "QueryGroup",
"RequestOptions", "ScoredResult", "infer_search_service",
"infer_id_type", "parse_rcsb_id", "build_entry_id", "build_assembly_id",
"build_entity_id", "build_instance_id", "add_property",
"list_rcsb_fields", "search_rcsb_fields", "validate_properties",
"generate_json_query", "search_graphql", "fetch_data",
"return_data_as_dataframe", "data_fetcher", "data_fetcher_batch",
"cache_info", "clear_rpdbapi_cache", "get_info", "find_results",
"find_papers", "describe_chemical", "get_fasta_from_rcsb_entry",
"get_pdb_file", "get_pdb_api_url", "send_api_request",
"handle_api_errors", "parse_response", "as_rpdb_entry",
"as_rpdb_assembly", "as_rpdb_polymer_entity",
"as_rpdb_chemical_component", "as_rpdb_structure",
"summarize_entries", "summarize_assemblies",
"extract_taxonomy_table", "extract_ligand_table",
"extract_calpha_coordinates", "join_structure_sequence"
),
Role = c(
"High-level convenience search helper",
"Operator-based search engine",
"Full-text search operator",
"Exact attribute match operator",
"Set-membership operator",
"Word containment operator",
"Phrase containment operator",
"Numeric/date comparison operator",
"Range filter operator",
"Attribute existence operator",
"Sequence similarity search operator",
"Automatic DNA/RNA/protein detection",
"Sequence motif search operator",
"Structure similarity search operator",
"Chemical descriptor search operator",
"Wrap one operator as a query node",
"Combine nodes with AND/OR logic",
"Pagination and sorting controls",
"Represent a scored hit",
"Infer backend service from operator",
"Infer identifier level from an ID string",
"Parse an identifier into structured components",
"Normalize or build entry identifiers",
"Build assembly identifiers",
"Build entity identifiers",
"Build instance or chain identifiers",
"Merge/extend GraphQL property lists",
"List known retrievable fields by data type",
"Search the built-in field registry",
"Validate a property list against the field registry",
"Build a GraphQL query string",
"Low-level GraphQL request helper",
"Normalize validated GraphQL payloads",
"Flatten nested payloads into data frames",
"High-level metadata fetcher",
"Batch metadata fetcher with retry and provenance",
"Inspect batch-cache contents",
"Clear on-disk cache entries",
"Retrieve full entry metadata",
"Extract one field across search hits",
"Extract primary citation titles",
"Retrieve ligand/chemical-component details",
"Retrieve FASTA sequences",
"Download and parse structure files",
"Build REST endpoint URLs",
"Send low-level GET/POST requests",
"Check HTTP status and stop on error",
"Parse JSON or text responses",
"Wrap entry data in a typed object",
"Wrap assembly data in a typed object",
"Wrap polymer-entity data in a typed object",
"Wrap chemical-component data in a typed object",
"Wrap structure data in a typed object",
"Summarize entry-level metadata",
"Summarize assembly-level metadata",
"Extract taxonomy-focused columns",
"Extract ligand-focused columns",
"Extract C-alpha coordinates",
"Join sequence summaries to chain coordinates"
),
stringsAsFactors = FALSE
)
knitr::kable(export_reference, align = c("l", "l"))
## ----every-export-pattern, eval = TRUE, echo=TRUE-----------------------------
# Search helpers
query_search("4HHB")
perform_search(DefaultOperator("4HHB"), verbosity = FALSE)
# Text and attribute operators
DefaultOperator("kinase")
ExactMatchOperator("exptl.method", "X-RAY DIFFRACTION")
InOperator("rcsb_entity_source_organism.taxonomy_lineage.name", c("Homo sapiens", "Mus musculus"))
ContainsWordsOperator("struct.title", "protein kinase")
ContainsPhraseOperator("struct.title", "protein kinase")
ComparisonOperator("rcsb_entry_info.resolution_combined", 2.0, "LESS")
RangeOperator("rcsb_entry_info.resolution_combined", 1.0, 2.5)
ExistsOperator("rcsb_primary_citation.pdbx_database_id_doi")
# Specialized operators
SequenceOperator("MVLSPADKTNVKAAW", sequence_type = "PROTEIN")
autoresolve_sequence_type("ATGCGTACGTAGC")
SeqMotifOperator("[LIV][ACDEFGHIKLMNPQRSTVWY]K[GST]", "PROTEIN", "REGEX")
StructureOperator("4HHB", assembly_id = 1, search_mode = "RELAXED_SHAPE_MATCH")
ChemicalOperator("C1=CC=CC=C1", matching_criterion = "graph-strict")
# Query composition
QueryNode(DefaultOperator("kinase"))
QueryGroup(list(DefaultOperator("kinase"), ExistsOperator("rcsb_primary_citation.title")), "AND")
RequestOptions(result_start_index = 0, num_results = 10)
ScoredResult("4HHB", 0.98)
infer_search_service(StructureOperator("4HHB"))
infer_id_type(c("4HHB", "4HHB-1", "4HHB_1", "4HHB.A", "ATP"))
parse_rcsb_id("4HHB-1")
build_entry_id("4HHB")
build_assembly_id("4HHB", 1)
build_entity_id("4HHB", 1)
build_instance_id("4HHB", "A")
# Metadata helpers
add_property(list(rcsb_entry_info = c("resolution_combined")))
list_rcsb_fields("ENTRY")
search_rcsb_fields("resolution", data_type = "ENTRY")
validate_properties(
list(rcsb_id = list(), rcsb_entry_info = c("resolution_combined")),
data_type = "ENTRY",
strict = TRUE
)
generate_json_query(c("4HHB"), "ENTRY", list(rcsb_id = list(), struct = c("title")))
search_graphql(list(query = generate_json_query(c("4HHB"), "ENTRY", list(rcsb_id = list()))))
fetch_data(generate_json_query(c("4HHB"), "ENTRY", list(rcsb_id = list())), "ENTRY", "4HHB")
return_data_as_dataframe(
fetch_data(generate_json_query(c("4HHB"), "ENTRY", list(rcsb_id = list())), "ENTRY", "4HHB"),
"ENTRY",
"4HHB"
)
data_fetcher("4HHB", "ENTRY", list(rcsb_id = list(), struct = c("title")))
data_fetcher_batch(
c("4HHB", "1CRN"),
"ENTRY",
list(rcsb_id = list(), struct = c("title")),
batch_size = 1,
cache = FALSE
)
cache_info()
clear_rpdbapi_cache()
quietly(get_info("4HHB"))
quietly(find_results("4HHB", field = "struct_keywords"))
quietly(find_papers("4HHB", max_results = 3))
describe_chemical("ATP")
get_fasta_from_rcsb_entry("4HHB")
# Files and low-level HTTP
get_pdb_file("4HHB", filetype = "cif", verbosity = FALSE)
get_pdb_api_url("core/entry/", "4HHB")
resp <- send_api_request(get_pdb_api_url("core/entry/", "4HHB"), verbosity = FALSE)
handle_api_errors(resp, get_pdb_api_url("core/entry/", "4HHB"))
parse_response(resp, format = "json")
# Object wrappers and analysis helpers
as_rpdb_entry(data.frame(rcsb_id = "4HHB"))
as_rpdb_assembly(data.frame(rcsb_id = "4HHB-1"))
as_rpdb_polymer_entity(data.frame(rcsb_id = "4HHB_1"))
as_rpdb_chemical_component(data.frame(rcsb_id = "ATP"))
as_rpdb_structure(get_pdb_file("4HHB", filetype = "cif", verbosity = FALSE))
summarize_entries(data.frame(method = "X-RAY DIFFRACTION", resolution_combined = "1.8"))
summarize_assemblies(data.frame(oligomeric_count = "2", symbol = "C2"))
extract_taxonomy_table(data.frame(rcsb_id = "4HHB_1", ncbi_taxonomy_id = "9606"))
extract_ligand_table(data.frame(rcsb_id = "ATP", formula_weight = "507.18"))
extract_calpha_coordinates(get_pdb_file("4HHB", filetype = "cif", verbosity = FALSE))
join_structure_sequence(
get_pdb_file("4HHB", filetype = "cif", verbosity = FALSE),
get_fasta_from_rcsb_entry("4HHB")
)
## ----id-format-table, results = "asis", echo=FALSE----------------------------
id_reference <- data.frame(
Data_or_Return_Type = c(
"ENTRY", "ASSEMBLY", "POLYMER_ENTITY", "BRANCHED_ENTITY",
"NONPOLYMER_ENTITY", "POLYMER_ENTITY_INSTANCE",
"BRANCHED_ENTITY_INSTANCE", "NONPOLYMER_ENTITY_INSTANCE",
"CHEMICAL_COMPONENT"
),
Typical_ID_Format = c(
"4-character PDB ID, e.g. 4HHB",
"Entry plus assembly ID, e.g. 4HHB-1",
"Entry plus entity ID, e.g. 4HHB_1",
"Entry plus branched entity ID",
"Entry plus nonpolymer entity ID, e.g. 3PQR_5",
"Instance or chain-level identifier, endpoint-specific",
"Instance-level identifier, endpoint-specific",
"Instance-level identifier, endpoint-specific",
"Chemical component ID, e.g. ATP"
),
Typical_Use = c(
"Whole-structure metadata",
"Biological assembly and symmetry",
"Entity-level taxonomy or sequence annotations",
"Glycan/branched entity records",
"Ligand records within structures",
"Chain-specific annotations",
"Branched entity instance records",
"Ligand instance records",
"Ligand chemistry and descriptors"
),
stringsAsFactors = FALSE
)
knitr::kable(id_reference, align = c("l", "l", "l"))
## ----return-contract-table, results = "asis", echo=FALSE----------------------
contract_reference <- data.frame(
Function = c(
"query_search(return_type = 'entry')",
"query_search(other return_type)",
"perform_search()",
"perform_search(return_with_scores = TRUE)",
"perform_search(return_raw_json_dict = TRUE)",
"fetch_data()",
"data_fetcher_batch(return_as_dataframe = TRUE)",
"data_fetcher(return_as_dataframe = TRUE)",
"data_fetcher(return_as_dataframe = FALSE)",
"as_rpdb_entry()",
"as_rpdb_assembly()",
"as_rpdb_polymer_entity()",
"as_rpdb_chemical_component()",
"as_rpdb_structure()"
),
Return_Class = c(
"rPDBapi_query_ids",
"rPDBapi_query_response",
"rPDBapi_search_ids",
"rPDBapi_search_scores",
"rPDBapi_search_raw_response",
"rPDBapi_fetch_response",
"rPDBapi_dataframe",
"rPDBapi_dataframe",
"rPDBapi_fetch_response",
"rPDBapi_entry",
"rPDBapi_assembly",
"rPDBapi_polymer_entity",
"rPDBapi_chemical_component",
"rPDBapi_structure"
),
Meaning = c(
"Identifier vector from query_search()",
"Parsed query_search payload",
"Identifier vector from perform_search()",
"Scored search results",
"Raw JSON-like search payload",
"Validated GraphQL fetch payload",
"Flattened batch result with provenance metadata",
"Flattened analysis-ready table",
"Nested validated fetch payload",
"Typed entry wrapper around retrieved data",
"Typed assembly wrapper around retrieved data",
"Typed polymer-entity wrapper around retrieved data",
"Typed chemical-component wrapper around retrieved data",
"Typed structure wrapper around retrieved data"
),
stringsAsFactors = FALSE
)
knitr::kable(contract_reference, align = c("l", "l", "l"))
## ----error-guidance-----------------------------------------------------------
error_guidance <- data.frame(
Scenario = c(
"Malformed search response",
"Unsupported return-type mapping",
"Invalid input to search/fetch helper",
"Unknown property or subproperty in strict mode",
"Batch retrieval failure after retries",
"HTTP failure",
"Response parsing failure"
),
Typical_Class_or_Source = c(
"rPDBapi_error_malformed_response",
"rPDBapi_error_unsupported_mapping",
"rPDBapi_error_invalid_input",
"validate_properties() / generate_json_query()",
"data_fetcher_batch()",
"handle_api_errors() / send_api_request()",
"parse_response()"
),
stringsAsFactors = FALSE
)
knitr::kable(error_guidance, align = c("l", "l"))
## ----reproducibility----------------------------------------------------------
analysis_manifest <- list(
live_examples = TRUE,
package_version = as.character(utils::packageVersion("rPDBapi")),
query = kinase_query,
requested_entry_fields = entry_properties,
strict_property_validation = getOption("rPDBapi.strict_property_validation", FALSE),
built_ids = list(
entry = build_entry_id("4HHB"),
assembly = build_assembly_id("4HHB", 1),
entity = build_entity_id("4HHB", 1),
instance = build_instance_id("4HHB", "A")
),
batch_provenance_example = attr(kinase_batch, "provenance")
)
str(analysis_manifest, max.level = 2)
## ----session-info-------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.