inst/doc/protein_structure_workflow.R

## ---- include = FALSE---------------------------------------------------------
build_vignette_on_cran <- identical(Sys.getenv("BUILD_VIGNETTE"), "true")
test_protti <- identical(Sys.getenv("TEST_PROTTI"), "true") & build_vignette_on_cran
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  out.width = "100%"
)

## ----CRAN_comment, message=FALSE, warning=FALSE, echo=FALSE-------------------
if (build_vignette_on_cran == FALSE){
  print("!!! IMPORTANT !!!")
  print("This Vignette has not been built completely on CRAN due to size limitations.")
  print("Please check the correct version here: ")
  print("https://jpquast.github.io/protti/articles/protein_structure_workflow.html")
}

## ----load_packages, eval = test_protti, warning = FALSE, message = FALSE------
#  # Load packages
#  library(protti)
#  library(dplyr)
#  library(magrittr)
#  library(stringr)
#  library(tidyr)
#  library(ggplot2)

## ----load_data, eval = test_protti, warning = FALSE---------------------------
#  utils::data("ptsi_pgk")

## ----use_read_protti, eval=FALSE----------------------------------------------
#  # Load data
#  your_data <- read_protti("your_differential_abundance_data.csv")

## ----prepare_data, eval = test_protti, warning = FALSE------------------------
#  # Input UniProt IDs
#  uniprot_ids <- unique(ptsi_pgk$pg_protein_accessions)
#  
#  # Fetch UniProt information
#  uniprot_information <- fetch_uniprot(uniprot_ids = uniprot_ids,
#                                       columns = c("sequence", "xref_pdb"))
#  
#  # Add UniProt information and find peptide positions
#  ptsi_pgk_annotated <- ptsi_pgk %>%
#    left_join(uniprot_information, by = c("pg_protein_accessions" = "accession")) %>%
#    find_peptide(protein_sequence = sequence, peptide_sequence = pep_stripped_sequence)

## ----extract_pdb_info, eval = test_protti, warning = FALSE--------------------
#  # Extract PDB IDs from UniProt information
#  ptsi_pgk_pdb_ids <- ptsi_pgk_annotated %>%
#    distinct(pg_protein_accessions, xref_pdb) %>%
#    mutate(pdb_id = str_split(xref_pdb, pattern = ";")) %>%
#    unnest(pdb_id) %>%
#    filter(pdb_id != "")
#  
#  # Fetch pdb information
#  ptsi_pgk_pdb_information <- fetch_pdb(pdb_ids = unique(ptsi_pgk_pdb_ids$pdb_id))

## ----filter_structures, eval = test_protti, warning = FALSE-------------------
#  filtered_structures <- ptsi_pgk_pdb_information %>%
#    filter(experimental_method == "X-ray",
#           resolution_combined <= 3) %>%
#    group_by(reference_database_accession) %>%
#    filter(length == max(length)) %>%
#    ungroup()

## ----fetch_pdb_structure, eval = test_protti, warning = FALSE-----------------
#  # Input PDB IDs
#  pdb_ids <- unique(filtered_structures$pdb_ids) # "1ZMR", "2HWG"
#  
#  # Fetch atom level structural information
#  ptsi_pgk_structure_information <- fetch_pdb_structure(pdb_ids = pdb_ids,
#                                                        return_data_frame = TRUE)

## ----fetch_alphafold_prediction, eval = test_protti, warning = FALSE----------
#  # Fetch atom level structural prediction information from AlphaFold
#  ptsi_pgk_prediction_information <- fetch_alphafold_prediction(uniprot_ids = uniprot_ids,
#                                                                return_data_frame = TRUE)
#  
#  # Example for fetching all predictions for Methanocaldococcus jannaschii
#  # mj_predictions <- fetch_alphafold_prediction(organism_name = "Methanocaldococcus jannaschii")

## ----alphafold_domain_prediction, eval = test_protti, warning = FALSE---------
#  # Fetch aligned errors
#  aligned_error <- fetch_alphafold_aligned_error(uniprot_ids = uniprot_ids,
#                                                 error_cutoff = 4)
#  
#  # Predict protein domains with graph_resolution of 1
#  af_domains_res_1 <- predict_alphafold_domain(pae_list = aligned_error,
#                                         return_data_frame = TRUE,
#                                         graph_resolution = 1) # Default
#  
#  # Predict protein domains with graph_resolution of 0.5
#  af_domains_res_05 <- predict_alphafold_domain(pae_list = aligned_error,
#                                         return_data_frame = TRUE,
#                                         graph_resolution = 0.5)

## ----show_model_P08839_res1, eval = test_protti, echo=FALSE, warning=FALSE----
#  # This code won't be seen in the Vignette
#  if(!is.null(af_domains_res_1)){
#  # Fetch structure file for model
#  protti:::try_query("https://alphafold.ebi.ac.uk/files/AF-P08839-F1-model_v3.pdb",
#    type = "text/tab-separated-values",
#    col_names = FALSE,
#    quote = "",
#    show_col_types = FALSE,
#    progress = FALSE
#  ) %>%
#    readr::write_tsv(
#      file = paste0(tempdir(), "/AF-P08839-F1-model_v3.pdb"),
#      quote = "none",
#      escape = "none",
#      col_names = FALSE,
#      progress = FALSE
#    )
#  
#  # Load the r3dmol package
#  library(r3dmol)
#  
#  # Extract domain positions
#  
#  domain1 <- af_domains_res_1 %>%
#    filter(accession == "P08839" & domain == 1) %>%
#    pull(residue)
#  
#  domain2 <- af_domains_res_1 %>%
#    filter(accession == "P08839" & domain == 2) %>%
#    pull(residue)
#  
#  domain3 <- af_domains_res_1 %>%
#    filter(accession == "P08839" & domain == 3) %>%
#    pull(residue)
#  
#  # Create model
#  r3dmol() %>%
#    m_add_model(data = paste0(tempdir(), "/AF-P08839-F1-model_v3.pdb"), format = "pdb") %>%
#    m_set_style(style = m_style_cartoon()) %>%
#    m_add_style(
#      style = m_style_cartoon(color = "#8047d6"),
#      sel = m_sel(resi = domain1)
#    ) %>%
#    m_add_style(
#      style = m_style_cartoon(color = "#96d647"),
#      sel = m_sel(resi = domain2)
#    ) %>%
#    m_add_style(
#      style = m_style_cartoon(color = "#FF7276"),
#      sel = m_sel(resi = domain3)
#    ) %>%
#    m_zoom_to()
#  }

## ----show_model_P08839_res05, eval = test_protti, echo=FALSE, warning=FALSE----
#  # This code won't be seen in the Vignette
#  if(!is.null(af_domains_res_1)){
#  # Extract domain positions
#  
#  domain1 <- af_domains_res_05 %>%
#    filter(accession == "P08839" & domain == 1) %>%
#    pull(residue)
#  
#  domain2 <- af_domains_res_05 %>%
#    filter(accession == "P08839" & domain == 2) %>%
#    pull(residue)
#  
#  domain3 <- af_domains_res_05 %>%
#    filter(accession == "P08839" & domain == 3) %>%
#    pull(residue)
#  
#  # Create model
#  r3dmol() %>%
#    m_add_model(data = paste0(tempdir(), "/AF-P08839-F1-model_v3.pdb"), format = "pdb") %>%
#    m_set_style(style = m_style_cartoon()) %>%
#    m_add_style(
#      style = m_style_cartoon(color = "#8047d6"),
#      sel = m_sel(resi = domain1)
#    ) %>%
#    m_add_style(
#      style = m_style_cartoon(color = "#96d647"),
#      sel = m_sel(resi = domain2)
#    ) %>%
#    m_add_style(
#      style = m_style_cartoon(color = "#FF7276"),
#      sel = m_sel(resi = domain3)
#    ) %>%
#    m_zoom_to()
#  }

## ----find_peptide, eval = test_protti, warning = FALSE------------------------
#  ptsi_pgk_peptide_structure_positions <- find_peptide_in_structure(
#     peptide_data = ptsi_pgk_annotated,
#     peptide = pep_stripped_sequence,
#     start = start,
#     end = end,
#     uniprot_id = pg_protein_accessions,
#     pdb_data = filtered_structures,
#     retain_columns = c(eg_precursor_id, diff, adj_pval))

## ----create_structure_contact_map, eval = test_protti, warning = FALSE, fig.width = 10, fig.height = 7, fig.align = "center"----
#  # Filter data for significant peptides.
#  significant_peptides <- ptsi_pgk_peptide_structure_positions %>%
#    filter(abs(diff) > 2, adj_pval <= 0.01)
#  
#  # Create a structure contact maps
#  contact_map <- create_structure_contact_map(
#    data = significant_peptides,
#    id = pdb_ids,
#    chain = auth_asym_id,
#    auth_seq_id = auth_seq_id,
#    distance_cutoff = 10,
#    pdb_model_number_selection = c(0, 1),
#    return_min_residue_distance = TRUE
#  )
#  
#  # This is a helper function for the plot.
#  # It allows the display of integers on the axis.
#  integer_breaks <- function(n = 5, ...) {
#    fxn <- function(x) {
#      breaks <- floor(pretty(x, n, ...))
#      names(breaks) <- attr(breaks, "labels")
#      breaks
#    }
#    return(fxn)
#  }
#  
#  # Plot structure contact maps
#  # 1ZMR
#  contact_map[["1ZMR"]] %>% # Extract data frame from list
#    mutate(chain_combinations = paste0("chain_", label_asym_id_var1, "_vs_chain_", label_asym_id_var2)) %>%
#    ggplot(aes(x = label_seq_id_var1, y = label_seq_id_var2, fill = min_distance_residue)) +
#    geom_tile() +
#    scale_y_continuous(breaks = integer_breaks()) +
#    scale_x_continuous(breaks = integer_breaks()) +
#    facet_wrap(~chain_combinations, scale = "free") +
#    labs(title = "Structure contact map 1ZMR") +
#    theme_bw()
#  
#  # 2HWG
#  contact_map[["2HWG"]] %>% # Extract data frame from list
#    mutate(chain_combinations = paste0("chain_", label_asym_id_var1, "_vs_chain_", label_asym_id_var2)) %>%
#    ggplot(aes(x = label_seq_id_var1, y = label_seq_id_var2, fill = min_distance_residue)) +
#    geom_tile() +
#    scale_y_continuous(breaks = integer_breaks()) +
#    scale_x_continuous(breaks = integer_breaks()) +
#    labs(title = "Structure contact map 2HWG") +
#    facet_wrap(~chain_combinations, scale = "free") +
#    theme_bw()

## ----peptide_mapping, eval = test_protti, warning = FALSE---------------------
#  ptsi_pgk_peptide_structure_positions %>%
#    mutate(map_value = ifelse(eg_precursor_id %in% significant_peptides$eg_precursor_id,
#                              100,
#                              0)) %>%
#    map_peptides_on_structure(
#     uniprot_id = pg_protein_accessions,
#     pdb_id = pdb_ids,
#     chain = auth_asym_id,
#     auth_seq_id = auth_seq_id,
#     map_value = map_value,
#     file_format = ".pdb",
#     export_location = tempdir() # change to a location of your choice
#   )

## ----3d_structure_mapping, eval = test_protti, echo=TRUE, warning=FALSE-------
#  # Install the r3dmol package if it is not installed
#  # install.packages("r3dmol")
#  
#  # Load the r3dmol package
#  library(r3dmol)
#  
#  # Create structure
#  r3dmol() %>%
#    m_add_model(data = paste0(tempdir(), "/1ZMR_P0A799.pdb"), format = "pdb") %>%
#    m_set_style(style = m_style_cartoon(
#      colorfunc = "
#          function(atom) {
#            if (atom.b == 50) {return '#90EE90'};
#            if (atom.b == 100) {return '#FF7276'};
#            if (atom.b == 0) {return 'white'};
#            return 'white';
#          }"
#    )) %>%
#    m_zoom_to()

## ----peptide_map_2hwg, eval = test_protti, echo = FALSE, fig.align = "center", fig.cap = "**ptsI (2HWG) with mapped significantly changing peptides.**"----
#  knitr::include_graphics("figures/peptide_map_2hwg.png")

## ----interaction_2hwg, eval = test_protti, echo = FALSE, fig.align = "center", out.width = "60%", fig.cap = "**ptsI (2HWG) binding interface of significantly chaging peptide.**"----
#  knitr::include_graphics("figures/interaction_2hwg.png")

## ----peptide_map_1zmr, eval = test_protti, echo = FALSE, fig.align = "center", fig.cap = "**pgk (1ZMR) with mapped significantly changing peptides.**"----
#  knitr::include_graphics("figures/peptide_map_1zmr.png")

## ----calculate_and_map_scores, eval = test_protti-----------------------------
#  # Calculate the amino acid score
#  amino_acid_score <- calculate_aa_scores(
#    data = ptsi_pgk_peptide_structure_positions,
#    protein = pg_protein_accessions,
#    diff = diff,
#    adj_pval = adj_pval,
#    start_position = start,
#    end_position = end,
#    retain_columns = c(pdb_ids, auth_asym_id)
#  )
#  
#  # Find amino acid positions in the structure
#  ptsi_pgk_amino_acid_structure_positions <- find_peptide_in_structure(
#     peptide_data = amino_acid_score,
#     peptide = residue,
#     start = residue,
#     end = residue,
#     uniprot_id = pg_protein_accessions,
#     pdb_data = filtered_structures,
#     retain_columns = c(amino_acid_score))
#  
#  # Map the score on structure
#  map_peptides_on_structure(
#    peptide_data = ptsi_pgk_amino_acid_structure_positions,
#     uniprot_id = pg_protein_accessions,
#     pdb_id = pdb_ids,
#     chain = auth_asym_id,
#     auth_seq_id = auth_seq_id,
#     map_value = amino_acid_score,
#     file_format = ".pdb",
#     export_location = tempdir()
#   )

## ----score_3d_structure_mapping, eval = test_protti, echo=TRUE, warning=FALSE----
#  # create a color gradient with 101 colors
#  color_gradient <- paste0('"',
#                           paste(colorRampPalette(c("white", "#90EE90", "#FF7276"))(101),
#                                 collapse = '", "'),
#                           '"')
#  
#  # create structure
#  r3dmol() %>%
#    m_add_model(data = paste0(tempdir(), "/2HWG_P08839.pdb"), format = "pdb") %>%
#    m_set_style(style = m_style_cartoon(
#      colorfunc = paste0("
#          function(atom) {
#            const color = [", color_gradient,"]
#            return color[Math.round(atom.b)]
#          }")
#    )) %>%
#    m_zoom_to()

## ----peptide_map_2hwg_score, eval = test_protti, echo = FALSE, fig.align = "center", fig.cap = "**ptsI (2HWG) with mapped amino acid scores.**"----
#  knitr::include_graphics("figures/peptide_map_2hwg_score.png")

## ----peptide_map_1zmr_score, eval = test_protti, echo = FALSE, fig.align = "center", fig.cap = "**pgk (1ZMR) with mapped amino acid scores.**"----
#  knitr::include_graphics("figures/peptide_map_1zmr_score.png")

Try the protti package in your browser

Any scripts or data that you put into this service are public.

protti documentation built on Jan. 22, 2023, 1:11 a.m.