inst/doc/data_analysis_single_dose_treatment_workflow.R

## ----include = FALSE----------------------------------------------------------
test_protti <- identical(Sys.getenv("TEST_PROTTI"), "true")
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup, eval = test_protti, message = FALSE, warning = FALSE--------------
# library(protti)
# library(magrittr)
# library(dplyr)

## ----eval=FALSE---------------------------------------------------------------
# # To read in your own data you can use read_protti()
# your_data <- read_protti(filename = "mydata/data.csv")

## ----load_data, eval = test_protti--------------------------------------------
# data("rapamycin_10uM")

## ----median_normalisation_filtering, eval = test_protti, message = FALSE, warning = FALSE----
# data_normalised <- rapamycin_10uM %>%
#   filter(eg_is_decoy == FALSE) %>%
#   mutate(intensity_log2 = log2(fg_quantity)) %>%
#   normalise(
#     sample = r_file_name,
#     intensity_log2 = intensity_log2,
#     method = "median"
#   )
# 
# data_filtered <- data_normalised %>%
#   filter_cv(
#     grouping = eg_precursor_id,
#     condition = r_condition,
#     log2_intensity = intensity_log2,
#     cv_limit = 0.25,
#     min_conditions = 1
#   )

## ----data_preparation_prot_pep, eval = test_protti, message = FALSE, warning = FALSE----
# data_filtered_proteotypic <- data_filtered %>%
#   filter(pep_is_proteotypic == TRUE)

## ----fetching_database_information, eval = test_protti, message = FALSE, warning = FALSE----
# uniprot_ids <- unique(data_filtered_proteotypic$pg_protein_accessions)
# 
# uniprot <-
#   fetch_uniprot(
#     uniprot_ids = uniprot_ids,
#     columns = c(
#       "protein_name",
#       "gene_names",
#       "go_f",
#       "xref_string",
#       "cc_interaction",
#       "ft_act_site",
#       "ft_binding",
#       "xref_pdb",
#       "length",
#       "sequence"
#     )
#   ) %>%
#   rename(pg_protein_accessions = accession)
# 
# data_filtered_uniprot <- data_filtered_proteotypic %>%
#   left_join(
#     y = uniprot,
#     by = "pg_protein_accessions"
#   ) %>%
#   find_peptide(
#     protein_sequence = sequence,
#     peptide_sequence = pep_stripped_sequence
#   ) %>%
#   assign_peptide_type(
#     aa_before = aa_before,
#     last_aa = last_aa,
#     aa_after = aa_after,
#     protein_id = pg_protein_accessions
#   ) %>%
#   calculate_sequence_coverage(
#     protein_sequence = sequence,
#     peptides = pep_stripped_sequence
#   )

## ----coverage_plot, eval = test_protti, fig.align= "center", fig.width = 6, fig.height = 5----
# qc_sequence_coverage(
#   data = data_filtered_uniprot,
#   protein_identifier = pg_protein_accessions,
#   coverage = coverage
# )

## ----t_test, eval = test_protti, message = FALSE, warning = FALSE-------------
# diff_abundance_data <- data_filtered_uniprot %>%
#   assign_missingness(
#     sample = r_file_name,
#     condition = r_condition,
#     grouping = eg_precursor_id,
#     intensity = normalised_intensity_log2,
#     ref_condition = "control",
#     completeness_MAR = 0.7,
#     completeness_MNAR = 0.25,
#     retain_columns = c(
#       pg_protein_accessions,
#       go_f,
#       xref_string,
#       start,
#       end,
#       length,
#       coverage
#     )
#   ) %>%
#   calculate_diff_abundance(
#     sample = r_file_name,
#     condition = r_condition,
#     grouping = eg_precursor_id,
#     intensity_log2 = normalised_intensity_log2,
#     missingness = missingness,
#     comparison = comparison,
#     method = "moderated_t-test",
#     retain_columns = c(
#       pg_protein_accessions,
#       go_f,
#       xref_string,
#       start,
#       end,
#       length,
#       coverage
#     )
#   )

## ----pval_distribution, eval = test_protti, message = FALSE, warning = FALSE, fig.align= "center", fig.width = 6, fig.height = 5----
# pval_distribution_plot(
#   data = diff_abundance_data,
#   grouping = eg_precursor_id,
#   pval = pval
# )

## ----volcano_plot, eval = test_protti, fig.align= "center", fig.width = 6, fig.height = 5, message = FALSE, warning = FALSE----
# volcano_plot(
#   data = diff_abundance_data,
#   grouping = eg_precursor_id,
#   log2FC = diff,
#   significance = pval,
#   method = "target",
#   target_column = pg_protein_accessions,
#   target = "P62942",
#   x_axis_label = "log2(fold change) Rapamycin treated vs. untreated",
#   significance_cutoff = c(0.05, "adj_pval")
# )
# 
# # The significance_cutoff argument can also just be used for a
# # regular cutoff line by just providing the cutoff value, e.g.
# # signficiance_cutoff = 0.05

## ----barcode_plot, eval = test_protti, fig.align = "center", fig.width = 6, message = FALSE, warning = FALSE----
# FKBP12 <- diff_abundance_data %>%
#   filter(pg_protein_accessions == "P62942")
# 
# barcode_plot(
#   data = FKBP12,
#   start_position = start,
#   end_position = end,
#   protein_length = length,
#   coverage = coverage,
#   colouring = diff,
#   cutoffs = c(diff = 1, adj_pval = 0.05),
#   protein_id = pg_protein_accessions
# )

## ----woods_plot, eval = test_protti, fig.align = "center", fig.width = 6, message = FALSE, warning = FALSE----
# FKBP12 <- FKBP12 %>%
#   mutate(significant = ifelse(adj_pval < 0.01, TRUE, FALSE))
# 
# woods_plot(
#   data = FKBP12,
#   fold_change = diff,
#   start_position = start,
#   end_position = end,
#   protein_length = length,
#   coverage = coverage,
#   colouring = adj_pval,
#   protein_id = pg_protein_accessions,
#   facet = FALSE,
#   fold_change_cutoff = 1,
#   highlight = significant
# )

## ----protile_plot, eval = test_protti, fig.align = "center", fig.width = 20, fig.height = 6, message = FALSE, warning = FALSE----
# FKBP12_intensity <- data_filtered_uniprot %>%
#   filter(pg_protein_accessions == "P62942")
# 
# peptide_profile_plot(
#   data = FKBP12_intensity,
#   sample = r_file_name,
#   peptide = eg_precursor_id,
#   intensity_log2 = normalised_intensity_log2,
#   grouping = pg_protein_accessions,
#   targets = "P62942",
#   protein_abundance_plot = FALSE
# )

## ----additional_functions, eval=FALSE-----------------------------------------
# diff_abundance_significant <- diff_abundance_data %>%
#   # mark significant peptides
#   mutate(is_significant = ifelse((adj_pval < 0.01 & abs(diff) > 1), TRUE, FALSE)) %>%
#   # mark true positive hits
#   mutate(binds_treatment = pg_protein_accessions == "P62942")
# 
# ### GO enrichment using "molecular function" annotation from UniProt
# 
# calculate_go_enrichment(
#   data = diff_abundance_significant,
#   protein_id = pg_protein_accessions,
#   is_significant = is_significant,
#   go_annotations_uniprot = go_f
# )
# 
# ### Network analysis
# 
# network_input <- diff_abundance_significant %>%
#   filter(is_significant == TRUE)
# 
# analyse_functional_network(
#   data = network_input,
#   protein_id = pg_protein_accessions,
#   string_id = xref_string,
#   binds_treatment = binds_treatment,
#   organism_id = 9606
# )
# 
# ### KEGG pathway enrichment
# 
# # First you need to load KEGG pathway annotations from the KEGG database
# # for your specific organism of interest. In this case HeLa cells were
# # used, therefore the organism of interest is homo sapiens (hsa)
# 
# kegg <- fetch_kegg(species = "hsa")
# 
# # Next we need to annotate our data with KEGG pathway IDs and perform enrichment analysis
# 
# diff_abundance_significant %>%
#   # columns containing proteins IDs are named differently
#   left_join(kegg, by = c("pg_protein_accessions" = "uniprot_id")) %>%
#   calculate_kegg_enrichment(
#     protein_id = pg_protein_accessions,
#     is_significant = is_significant,
#     pathway_id = pathway_id,
#     pathway_name = pathway_name
#   )
# 
# ### Treatment enrichment analysis
# 
# calculate_treatment_enrichment(diff_abundance_significant,
#   protein_id = pg_protein_accessions,
#   is_significant = is_significant,
#   binds_treatment = binds_treatment,
#   treatment_name = "Rapamycin"
# )

Try the protti package in your browser

Any scripts or data that you put into this service are public.

protti documentation built on Jan. 14, 2026, 9:08 a.m.