inst/doc/protein_structure_workflow.R

## ----include = FALSE----------------------------------------------------------
build_vignette_on_cran <- identical(Sys.getenv("BUILD_VIGNETTE"), "true")
test_protti <- identical(Sys.getenv("TEST_PROTTI"), "true") & build_vignette_on_cran
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  out.width = "100%"
)

## ----CRAN_comment, message=FALSE, warning=FALSE, echo=FALSE-------------------
if (build_vignette_on_cran == FALSE) {
  print("!!! IMPORTANT !!!")
  print("This Vignette has not been built completely on CRAN due to size limitations.")
  print("Please check the correct version here: ")
  print("https://jpquast.github.io/protti/articles/protein_structure_workflow.html")
}

## ----load_packages, eval = test_protti, warning = FALSE, message = FALSE------
# # Load packages
# library(protti)
# library(dplyr)
# library(magrittr)
# library(stringr)
# library(tidyr)
# library(ggplot2)

## ----load_data, eval = test_protti, warning = FALSE---------------------------
# utils::data("ptsi_pgk")

## ----use_read_protti, eval=FALSE----------------------------------------------
# # Load data
# your_data <- read_protti("your_differential_abundance_data.csv")

## ----prepare_data, eval = test_protti, warning = FALSE------------------------
# # Input UniProt IDs
# uniprot_ids <- unique(ptsi_pgk$pg_protein_accessions)
# 
# # Fetch UniProt information
# uniprot_information <- fetch_uniprot(
#   uniprot_ids = uniprot_ids,
#   columns = c("sequence", "xref_pdb")
# )
# 
# # Add UniProt information and find peptide positions
# ptsi_pgk_annotated <- ptsi_pgk %>%
#   left_join(uniprot_information, by = c("pg_protein_accessions" = "accession")) %>%
#   find_peptide(protein_sequence = sequence, peptide_sequence = pep_stripped_sequence)

## ----extract_pdb_info, eval = test_protti, warning = FALSE--------------------
# # Extract PDB IDs from UniProt information
# ptsi_pgk_pdb_ids <- ptsi_pgk_annotated %>%
#   distinct(pg_protein_accessions, xref_pdb) %>%
#   mutate(pdb_id = str_split(xref_pdb, pattern = ";")) %>%
#   unnest(pdb_id) %>%
#   filter(pdb_id != "")
# 
# # Fetch pdb information
# ptsi_pgk_pdb_information <- fetch_pdb(pdb_ids = unique(ptsi_pgk_pdb_ids$pdb_id))

## ----filter_structures, eval = test_protti, warning = FALSE-------------------
# filtered_structures <- ptsi_pgk_pdb_information %>%
#   filter(
#     experimental_method == "X-ray",
#     resolution_combined <= 3
#   ) %>%
#   group_by(reference_database_accession) %>%
#   filter(length == max(length)) %>%
#   ungroup()

## ----fetch_pdb_structure, eval = test_protti, warning = FALSE-----------------
# # Input PDB IDs
# pdb_ids <- unique(filtered_structures$pdb_ids) # "1ZMR", "2HWG"
# 
# # Fetch atom level structural information
# ptsi_pgk_structure_information <- fetch_pdb_structure(
#   pdb_ids = pdb_ids,
#   return_data_frame = TRUE
# )

## ----fetch_alphafold_prediction, eval = test_protti, warning = FALSE----------
# # Fetch atom level structural prediction information from AlphaFold
# ptsi_pgk_prediction_information <- fetch_alphafold_prediction(
#   uniprot_ids = uniprot_ids,
#   return_data_frame = TRUE
# )
# 
# # Example for fetching all predictions for Methanocaldococcus jannaschii
# # mj_predictions <- fetch_alphafold_prediction(organism_name = "Methanocaldococcus jannaschii")

## ----alphafold_domain_prediction, eval = test_protti, warning = FALSE---------
# # Fetch aligned errors
# aligned_error <- fetch_alphafold_aligned_error(
#   uniprot_ids = uniprot_ids,
#   error_cutoff = 4
# )
# 
# # Predict protein domains with graph_resolution of 1
# af_domains_res_1 <- predict_alphafold_domain(
#   pae_list = aligned_error,
#   return_data_frame = TRUE,
#   graph_resolution = 1
# ) # Default
# 
# # Predict protein domains with graph_resolution of 0.5
# af_domains_res_05 <- predict_alphafold_domain(
#   pae_list = aligned_error,
#   return_data_frame = TRUE,
#   graph_resolution = 0.5
# )

## ----show_model_P08839_res1, eval = test_protti, echo=FALSE, warning=FALSE----
# # This code won't be seen in the Vignette
# if (!is.null(af_domains_res_1)) {
#   # Fetch structure file for model
#   protti:::try_query("https://alphafold.ebi.ac.uk/files/AF-P08839-F1-model_v6.pdb",
#     type = "text/tab-separated-values",
#     col_names = FALSE,
#     quote = "",
#     show_col_types = FALSE,
#     progress = FALSE
#   ) %>%
#     readr::write_tsv(
#       file = paste0(tempdir(), "/AF-P08839-F1-model_v6.pdb"),
#       quote = "none",
#       escape = "none",
#       col_names = FALSE,
#       progress = FALSE
#     )
# 
#   # Load the r3dmol package
#   library(r3dmol)
# 
#   # Extract domain positions
# 
#   domain1 <- af_domains_res_1 %>%
#     filter(accession == "P08839" & domain == 1) %>%
#     pull(residue)
# 
#   domain2 <- af_domains_res_1 %>%
#     filter(accession == "P08839" & domain == 2) %>%
#     pull(residue)
# 
#   domain3 <- af_domains_res_1 %>%
#     filter(accession == "P08839" & domain == 3) %>%
#     pull(residue)
# 
#   # Create model
#   r3dmol() %>%
#     m_add_model(data = paste0(tempdir(), "/AF-P08839-F1-model_v6.pdb"), format = "pdb") %>%
#     m_set_style(style = m_style_cartoon()) %>%
#     m_add_style(
#       style = m_style_cartoon(color = "#8047d6"),
#       sel = m_sel(resi = domain1)
#     ) %>%
#     m_add_style(
#       style = m_style_cartoon(color = "#96d647"),
#       sel = m_sel(resi = domain2)
#     ) %>%
#     m_add_style(
#       style = m_style_cartoon(color = "#FF7276"),
#       sel = m_sel(resi = domain3)
#     ) %>%
#     m_zoom_to()
# }

## ----show_model_P08839_res05, eval = test_protti, echo=FALSE, warning=FALSE----
# # This code won't be seen in the Vignette
# if (!is.null(af_domains_res_1)) {
#   # Extract domain positions
# 
#   domain1 <- af_domains_res_05 %>%
#     filter(accession == "P08839" & domain == 1) %>%
#     pull(residue)
# 
#   domain2 <- af_domains_res_05 %>%
#     filter(accession == "P08839" & domain == 2) %>%
#     pull(residue)
# 
#   domain3 <- af_domains_res_05 %>%
#     filter(accession == "P08839" & domain == 3) %>%
#     pull(residue)
# 
#   # Create model
#   r3dmol() %>%
#     m_add_model(data = paste0(tempdir(), "/AF-P08839-F1-model_v6.pdb"), format = "pdb") %>%
#     m_set_style(style = m_style_cartoon()) %>%
#     m_add_style(
#       style = m_style_cartoon(color = "#8047d6"),
#       sel = m_sel(resi = domain1)
#     ) %>%
#     m_add_style(
#       style = m_style_cartoon(color = "#96d647"),
#       sel = m_sel(resi = domain2)
#     ) %>%
#     m_add_style(
#       style = m_style_cartoon(color = "#FF7276"),
#       sel = m_sel(resi = domain3)
#     ) %>%
#     m_zoom_to()
# }

## ----find_peptide, eval = test_protti, warning = FALSE------------------------
# ptsi_pgk_peptide_structure_positions <- find_peptide_in_structure(
#   peptide_data = ptsi_pgk_annotated,
#   peptide = pep_stripped_sequence,
#   start = start,
#   end = end,
#   uniprot_id = pg_protein_accessions,
#   pdb_data = filtered_structures,
#   retain_columns = c(eg_precursor_id, diff, adj_pval)
# )

## ----create_structure_contact_map, eval = test_protti, warning = FALSE, fig.width = 10, fig.height = 7, fig.align = "center"----
# # Filter data for significant peptides.
# significant_peptides <- ptsi_pgk_peptide_structure_positions %>%
#   filter(abs(diff) > 2, adj_pval <= 0.01)
# 
# # Create a structure contact maps
# contact_map <- create_structure_contact_map(
#   data = significant_peptides,
#   id = pdb_ids,
#   chain = auth_asym_id,
#   auth_seq_id = auth_seq_id,
#   distance_cutoff = 10,
#   pdb_model_number_selection = c(0, 1),
#   return_min_residue_distance = TRUE
# )
# 
# # This is a helper function for the plot.
# # It allows the display of integers on the axis.
# integer_breaks <- function(n = 5, ...) {
#   fxn <- function(x) {
#     breaks <- floor(pretty(x, n, ...))
#     names(breaks) <- attr(breaks, "labels")
#     breaks
#   }
#   return(fxn)
# }
# 
# # Plot structure contact maps
# # 1ZMR
# contact_map[["1ZMR"]] %>% # Extract data frame from list
#   mutate(chain_combinations = paste0("chain_", label_asym_id_var1, "_vs_chain_", label_asym_id_var2)) %>%
#   ggplot(aes(x = label_seq_id_var1, y = label_seq_id_var2, fill = min_distance_residue)) +
#   geom_tile() +
#   scale_y_continuous(breaks = integer_breaks()) +
#   scale_x_continuous(breaks = integer_breaks()) +
#   facet_wrap(~chain_combinations, scale = "free") +
#   labs(title = "Structure contact map 1ZMR") +
#   theme_bw()
# 
# # 2HWG
# contact_map[["2HWG"]] %>% # Extract data frame from list
#   mutate(chain_combinations = paste0("chain_", label_asym_id_var1, "_vs_chain_", label_asym_id_var2)) %>%
#   ggplot(aes(x = label_seq_id_var1, y = label_seq_id_var2, fill = min_distance_residue)) +
#   geom_tile() +
#   scale_y_continuous(breaks = integer_breaks()) +
#   scale_x_continuous(breaks = integer_breaks()) +
#   labs(title = "Structure contact map 2HWG") +
#   facet_wrap(~chain_combinations, scale = "free") +
#   theme_bw()

## ----peptide_mapping, eval = test_protti, warning = FALSE---------------------
# ptsi_pgk_peptide_structure_positions %>%
#   mutate(map_value = ifelse(eg_precursor_id %in% significant_peptides$eg_precursor_id,
#     100,
#     0
#   )) %>%
#   map_peptides_on_structure(
#     uniprot_id = pg_protein_accessions,
#     pdb_id = pdb_ids,
#     chain = auth_asym_id,
#     auth_seq_id = auth_seq_id,
#     map_value = map_value,
#     file_format = ".pdb",
#     export_location = tempdir() # change to a location of your choice
#   )

## ----3d_structure_mapping, eval = test_protti, echo=TRUE, warning=FALSE-------
# # Install the r3dmol package if it is not installed
# # install.packages("r3dmol")
# 
# # Load the r3dmol package
# library(r3dmol)
# 
# # Create structure
# r3dmol() %>%
#   m_add_model(data = paste0(tempdir(), "/1ZMR_P0A799.pdb"), format = "pdb") %>%
#   m_set_style(style = m_style_cartoon(
#     colorfunc = "
#         function(atom) {
#           if (atom.b == 50) {return '#90EE90'};
#           if (atom.b == 100) {return '#FF7276'};
#           if (atom.b == 0) {return 'white'};
#           return 'white';
#         }"
#   )) %>%
#   m_zoom_to()

## ----peptide_map_2hwg, eval = test_protti, echo = FALSE, fig.align = "center", fig.cap = "**ptsI (2HWG) with mapped significantly changing peptides.**"----
# knitr::include_graphics("figures/peptide_map_2hwg.png")

## ----interaction_2hwg, eval = test_protti, echo = FALSE, fig.align = "center", out.width = "60%", fig.cap = "**ptsI (2HWG) binding interface of significantly chaging peptide.**"----
# knitr::include_graphics("figures/interaction_2hwg.png")

## ----peptide_map_1zmr, eval = test_protti, echo = FALSE, fig.align = "center", fig.cap = "**pgk (1ZMR) with mapped significantly changing peptides.**"----
# knitr::include_graphics("figures/peptide_map_1zmr.png")

## ----calculate_and_map_scores, eval = test_protti-----------------------------
# # Calculate the amino acid score
# amino_acid_score <- calculate_aa_scores(
#   data = ptsi_pgk_peptide_structure_positions,
#   protein = pg_protein_accessions,
#   diff = diff,
#   adj_pval = adj_pval,
#   start_position = start,
#   end_position = end,
#   retain_columns = c(pdb_ids, auth_asym_id)
# )
# 
# # Find amino acid positions in the structure
# ptsi_pgk_amino_acid_structure_positions <- find_peptide_in_structure(
#   peptide_data = amino_acid_score,
#   peptide = residue,
#   start = residue,
#   end = residue,
#   uniprot_id = pg_protein_accessions,
#   pdb_data = filtered_structures,
#   retain_columns = c(amino_acid_score)
# )
# 
# # Map the score on structure
# map_peptides_on_structure(
#   peptide_data = ptsi_pgk_amino_acid_structure_positions,
#   uniprot_id = pg_protein_accessions,
#   pdb_id = pdb_ids,
#   chain = auth_asym_id,
#   auth_seq_id = auth_seq_id,
#   map_value = amino_acid_score,
#   file_format = ".pdb",
#   export_location = tempdir()
# )

## ----score_3d_structure_mapping, eval = test_protti, echo=TRUE, warning=FALSE----
# # create a color gradient with 101 colors
# color_gradient <- paste0(
#   '"',
#   paste(colorRampPalette(c("white", "#90EE90", "#FF7276"))(101),
#     collapse = '", "'
#   ),
#   '"'
# )
# 
# # create structure
# r3dmol() %>%
#   m_add_model(data = paste0(tempdir(), "/2HWG_P08839.pdb"), format = "pdb") %>%
#   m_set_style(style = m_style_cartoon(
#     colorfunc = paste0("
#         function(atom) {
#           const color = [", color_gradient, "]
#           return color[Math.round(atom.b)]
#         }")
#   )) %>%
#   m_zoom_to()

## ----peptide_map_2hwg_score, eval = test_protti, echo = FALSE, fig.align = "center", fig.cap = "**ptsI (2HWG) with mapped amino acid scores.**"----
# knitr::include_graphics("figures/peptide_map_2hwg_score.png")

## ----peptide_map_1zmr_score, eval = test_protti, echo = FALSE, fig.align = "center", fig.cap = "**pgk (1ZMR) with mapped amino acid scores.**"----
# knitr::include_graphics("figures/peptide_map_1zmr_score.png")

Try the protti package in your browser

Any scripts or data that you put into this service are public.

protti documentation built on Jan. 14, 2026, 9:08 a.m.