inst/doc/data_analysis_dose_response_workflow.R

## ----include = FALSE----------------------------------------------------------
build_vignette_on_cran <- identical(Sys.getenv("BUILD_VIGNETTE"), "true")
test_protti <- identical(Sys.getenv("TEST_PROTTI"), "true") & build_vignette_on_cran
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----CRAN_comment, message=FALSE, warning=FALSE, echo=FALSE-------------------
if (build_vignette_on_cran == FALSE) {
  print("!!! IMPORTANT !!!")
  print("This Vignette has not been built completely on CRAN due to size limitations.")
  print("Please check the correct version here: ")
  print("https://jpquast.github.io/protti/articles/https://jpquast.github.io/protti/articles/data_analysis_dose_response_workflow.html")
}

## ----load_packages, eval = test_protti, warning = FALSE, message = FALSE------
# # Load packages
# library(protti)
# library(dplyr)
# library(magrittr)
# library(ggplot2)

## ----use_read_protti, eval=FALSE----------------------------------------------
# # Load data
# rapamycin_dose_response <- read_protti("your_data.csv")

## ----load_data, eval = test_protti, warning = FALSE---------------------------
# utils::data("rapamycin_dose_response")

## ----filter_transform_normalise, eval = test_protti---------------------------
# # Filter, log2 transform and normalise data
# data_normalised <- rapamycin_dose_response %>%
#   filter(eg_is_decoy == FALSE) %>%
#   mutate(intensity_log2 = log2(fg_quantity)) %>%
#   normalise(
#     sample = r_file_name,
#     intensity_log2 = intensity_log2,
#     method = "median"
#   ) %>%
#   filter(pep_is_proteotypic == TRUE)

## ----intensity_distribution, eval = test_protti, fig.align = "center", fig.width = 7, fig.height = 5----
# qc_intensity_distribution(
#   data = data_normalised,
#   grouping = eg_precursor_id,
#   intensity = normalised_intensity_log2,
#   plot_style = "histogram"
# )

## ----intensity_filtering, eval = test_protti----------------------------------
# data_normalised <- data_normalised %>%
#   filter(normalised_intensity_log2 > 5)

## ----sample_correlation, eval = test_protti, fig.align = "center", fig.width = 7, fig.height = 5----
# qc_sample_correlation(
#   data = data_normalised,
#   sample = r_file_name,
#   grouping = eg_precursor_id,
#   intensity_log2 = normalised_intensity_log2,
#   condition = r_condition
# )

## ----sample_pca, eval = test_protti, fig.align = "center", fig.width = 7, fig.height = 5, warning = FALSE, message = FALSE----
# qc_pca(
#   data = data_normalised,
#   sample = r_file_name,
#   grouping = eg_precursor_id,
#   intensity = normalised_intensity_log2,
#   condition = r_condition
# )

## ----model_fit, eval = test_protti--------------------------------------------
# fit <- data_normalised %>%
#   fit_drc_4p(
#     sample = r_file_name,
#     grouping = eg_precursor_id,
#     response = normalised_intensity_log2,
#     dose = r_condition,
#     filter = "post",
#     n_replicate_completeness = 2, # specifiy this based on your data and cutoff criteria
#     n_condition_completeness = 4, # specifiy this based on your data and cutoff criteria
#     retain_columns = c(pg_protein_accessions)
#   )
# 
# # make sure to retain columns that you need later but that are not part of the function

## ----parallel_model_fit, eval = FALSE-----------------------------------------
# # setup of cores. Make sure you have the future package installed
# future::plan(future::multisession, workers = 3)
# 
# # fit models in parallel
# parallel_fit <- data_normalised %>%
#   parallel_fit_drc_4p(
#     sample = r_file_name,
#     grouping = eg_precursor_id,
#     response = normalised_intensity_log2,
#     dose = r_condition,
#     retain_columns = c(pg_protein_accessions),
#     n_cores = 3,
#     n_replicate_completeness = 2, # specifiy this based on your data and cutoff criteria
#     n_condition_completeness = 4, # specifiy this based on your data and cutoff criteria
#   )
# 
# # remove workers again after you are done
# future::plan(future::sequential)

## ----result_analysis, eval = test_protti, echo=FALSE, results='asis'----------
# fit %>%
#   filter(rank <= 20) %>%
#   select(
#     rank,
#     score,
#     eg_precursor_id,
#     pg_protein_accessions,
#     anova_adj_pval,
#     correlation,
#     ec_50
#   ) %>%
#   mutate(
#     anova_adj_pval = format(anova_adj_pval, digits = 3),
#     correlation = format(correlation, digits = 3),
#     ec_50 = format(ec_50, digits = 2),
#     score = format(score, digits = 3)
#   ) %>%
#   knitr::kable(caption = "All hits")

## ----model_plot, eval = test_protti, fig.align = "center", fig.width = 7, fig.height = 5, message = FALSE, warning = FALSE----
# # Model plotting
# drc_4p_plot(fit,
#   grouping = eg_precursor_id,
#   dose = r_condition,
#   response = normalised_intensity_log2,
#   targets = "_VFDVELLKLE_.2",
#   unit = "pM",
#   x_axis_limits = c(10, 1e+08),
#   export = FALSE
# )

## ----uniprot_annotation, eval = test_protti-----------------------------------
# # fetching of UniProt information
# unis <- unique(fit$pg_protein_accessions)
# uniprot <- fetch_uniprot(unis)
# 
# # annotation of fit data based on information from UniProt
# fit_annotated <- fit %>%
#   # columns containing proteins IDs are named differently
#   left_join(uniprot, by = c("pg_protein_accessions" = "accession")) %>%
#   # mark peptides that pass the filtering
#   mutate(passed_filter = !is.na(rank)) %>%
#   # create new column with prior knowledge about binding partners of treatment
#   mutate(binds_treatment = pg_protein_accessions == "P62942")

## ----post_analysis, eval = FALSE----------------------------------------------
# ### GO enrichment using "molecular function" annotation from UniProt
# 
# calculate_go_enrichment(fit_annotated,
#   protein_id = pg_protein_accessions,
#   is_significant = passed_filter,
#   go_annotations_uniprot = go_f
# ) # column obtained from UniProt
# 
# ### KEGG pathway enrichment
# 
# # First you need to load KEGG pathway annotations from the KEGG database
# # for your specific organism of interest. In this case HeLa cells were
# # used, therefore the organism of interest is homo sapiens (hsa)
# 
# kegg <- fetch_kegg(species = "hsa")
# 
# # Next we need to annotate our data with KEGG pathway IDs and perform enrichment analysis
# 
# fit %>%
#   # columns containing proteins IDs are named differently
#   left_join(kegg, by = c("pg_protein_accessions" = "uniprot_id")) %>%
#   calculate_kegg_enrichment(
#     protein_id = pg_protein_accessions,
#     is_significant = passed_filter,
#     pathway_id = pathway_id, # column name from kegg data frame
#     pathway_name = pathway_name
#   ) # column name from kegg data frame
# 
# ### Treatment enrichment analysis
# 
# calculate_treatment_enrichment(fit_annotated,
#   protein_id = pg_protein_accessions,
#   is_significant = passed_filter,
#   binds_treatment = binds_treatment,
#   treatment_name = "Rapamycin"
# )
# 
# ### Network analysis
# 
# fit_annotated %>%
#   filter(passed_filter == TRUE) %>% # only analyse hits that were significant
#   analyse_functional_network(
#     protein_id = pg_protein_accessions,
#     string_id = xref_string, # column from UniProt containing STRING IDs
#     organism_id = 9606,
#     # tax ID can be found in function documentation or STRING database
#     binds_treatment = binds_treatment,
#     plot = TRUE
#   )

Try the protti package in your browser

Any scripts or data that you put into this service are public.

protti documentation built on Jan. 14, 2026, 9:08 a.m.