inst/doc/data_analysis_dose_response_workflow.R

## ----include = FALSE----------------------------------------------------------
build_vignette_on_cran <- identical(Sys.getenv("BUILD_VIGNETTE"), "true")
test_protti <- identical(Sys.getenv("TEST_PROTTI"), "true") & build_vignette_on_cran
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----CRAN_comment, message=FALSE, warning=FALSE, echo=FALSE-------------------
if (build_vignette_on_cran == FALSE) {
  print("!!! IMPORTANT !!!")
  print("This Vignette has not been built completely on CRAN due to size limitations.")
  print("Please check the correct version here: ")
  print("https://jpquast.github.io/protti/articles/https://jpquast.github.io/protti/articles/data_analysis_dose_response_workflow.html")
}

## ----load_packages, eval = test_protti, warning = FALSE, message = FALSE------
#  # Load packages
#  library(protti)
#  library(dplyr)
#  library(magrittr)
#  library(ggplot2)

## ----use_read_protti, eval=FALSE----------------------------------------------
#  # Load data
#  rapamycin_dose_response <- read_protti("your_data.csv")

## ----load_data, eval = test_protti, warning = FALSE---------------------------
#  utils::data("rapamycin_dose_response")

## ----filter_transform_normalise, eval = test_protti---------------------------
#  # Filter, log2 transform and normalise data
#  data_normalised <- rapamycin_dose_response %>%
#    filter(eg_is_decoy == FALSE) %>%
#    mutate(intensity_log2 = log2(fg_quantity)) %>%
#    normalise(
#      sample = r_file_name,
#      intensity_log2 = intensity_log2,
#      method = "median"
#    ) %>%
#    filter(pep_is_proteotypic == TRUE)

## ----intensity_distribution, eval = test_protti, fig.align = "center", fig.width = 7, fig.height = 5----
#  qc_intensity_distribution(
#    data = data_normalised,
#    grouping = eg_precursor_id,
#    intensity = normalised_intensity_log2,
#    plot_style = "histogram"
#  )

## ----intensity_filtering, eval = test_protti----------------------------------
#  data_normalised <- data_normalised %>%
#    filter(normalised_intensity_log2 > 5)

## ----sample_correlation, eval = test_protti, fig.align = "center", fig.width = 7, fig.height = 5----
#  qc_sample_correlation(
#    data = data_normalised,
#    sample = r_file_name,
#    grouping = eg_precursor_id,
#    intensity_log2 = normalised_intensity_log2,
#    condition = r_condition
#  )

## ----sample_pca, eval = test_protti, fig.align = "center", fig.width = 7, fig.height = 5, warning = FALSE, message = FALSE----
#  qc_pca(
#    data = data_normalised,
#    sample = r_file_name,
#    grouping = eg_precursor_id,
#    intensity = normalised_intensity_log2,
#    condition = r_condition
#  )

## ----model_fit, eval = test_protti--------------------------------------------
#  fit <- data_normalised %>%
#    fit_drc_4p(
#      sample = r_file_name,
#      grouping = eg_precursor_id,
#      response = normalised_intensity_log2,
#      dose = r_condition,
#      filter = "post",
#      n_replicate_completeness = 2, # specifiy this based on your data and cutoff criteria
#      n_condition_completeness = 4, # specifiy this based on your data and cutoff criteria
#      retain_columns = c(pg_protein_accessions)
#    )
#  
#  # make sure to retain columns that you need later but that are not part of the function

## ----parallel_model_fit, eval = FALSE-----------------------------------------
#  # setup of cores. Make sure you have the future package installed
#  future::plan(future::multisession, workers = 3)
#  
#  # fit models in parallel
#  parallel_fit <- data_normalised %>%
#    parallel_fit_drc_4p(
#      sample = r_file_name,
#      grouping = eg_precursor_id,
#      response = normalised_intensity_log2,
#      dose = r_condition,
#      retain_columns = c(pg_protein_accessions),
#      n_cores = 3,
#      n_replicate_completeness = 2, # specifiy this based on your data and cutoff criteria
#      n_condition_completeness = 4, # specifiy this based on your data and cutoff criteria
#    )
#  
#  # remove workers again after you are done
#  future::plan(future::sequential)

## ----result_analysis, eval = test_protti, echo=FALSE, results='asis'----------
#  fit %>%
#    filter(rank <= 20) %>%
#    select(
#      rank,
#      score,
#      eg_precursor_id,
#      pg_protein_accessions,
#      anova_adj_pval,
#      correlation,
#      ec_50
#    ) %>%
#    mutate(
#      anova_adj_pval = format(anova_adj_pval, digits = 3),
#      correlation = format(correlation, digits = 3),
#      ec_50 = format(ec_50, digits = 2),
#      score = format(score, digits = 3)
#    ) %>%
#    knitr::kable(caption = "All hits")

## ----model_plot, eval = test_protti, fig.align = "center", fig.width = 7, fig.height = 5, message = FALSE, warning = FALSE----
#  # Model plotting
#  drc_4p_plot(fit,
#    grouping = eg_precursor_id,
#    dose = r_condition,
#    response = normalised_intensity_log2,
#    targets = "_VFDVELLKLE_.2",
#    unit = "pM",
#    x_axis_limits = c(10, 1e+08),
#    export = FALSE
#  )

## ----uniprot_annotation, eval = test_protti-----------------------------------
#  # fetching of UniProt information
#  unis <- unique(fit$pg_protein_accessions)
#  uniprot <- fetch_uniprot(unis)
#  
#  # annotation of fit data based on information from UniProt
#  fit_annotated <- fit %>%
#    # columns containing proteins IDs are named differently
#    left_join(uniprot, by = c("pg_protein_accessions" = "accession")) %>%
#    # mark peptides that pass the filtering
#    mutate(passed_filter = !is.na(rank)) %>%
#    # create new column with prior knowledge about binding partners of treatment
#    mutate(binds_treatment = pg_protein_accessions == "P62942")

## ----post_analysis, eval = FALSE----------------------------------------------
#  ### GO enrichment using "molecular function" annotation from UniProt
#  
#  calculate_go_enrichment(fit_annotated,
#    protein_id = pg_protein_accessions,
#    is_significant = passed_filter,
#    go_annotations_uniprot = go_f
#  ) # column obtained from UniProt
#  
#  ### KEGG pathway enrichment
#  
#  # First you need to load KEGG pathway annotations from the KEGG database
#  # for your specific organism of interest. In this case HeLa cells were
#  # used, therefore the organism of interest is homo sapiens (hsa)
#  
#  kegg <- fetch_kegg(species = "hsa")
#  
#  # Next we need to annotate our data with KEGG pathway IDs and perform enrichment analysis
#  
#  fit %>%
#    # columns containing proteins IDs are named differently
#    left_join(kegg, by = c("pg_protein_accessions" = "uniprot_id")) %>%
#    calculate_kegg_enrichment(
#      protein_id = pg_protein_accessions,
#      is_significant = passed_filter,
#      pathway_id = pathway_id, # column name from kegg data frame
#      pathway_name = pathway_name
#    ) # column name from kegg data frame
#  
#  ### Treatment enrichment analysis
#  
#  calculate_treatment_enrichment(fit_annotated,
#    protein_id = pg_protein_accessions,
#    is_significant = passed_filter,
#    binds_treatment = binds_treatment,
#    treatment_name = "Rapamycin"
#  )
#  
#  ### Network analysis
#  
#  fit_annotated %>%
#    filter(passed_filter == TRUE) %>% # only analyse hits that were significant
#    analyse_functional_network(
#      protein_id = pg_protein_accessions,
#      string_id = xref_string, # column from UniProt containing STRING IDs
#      organism_id = 9606,
#      # tax ID can be found in function documentation or STRING database
#      binds_treatment = binds_treatment,
#      plot = TRUE
#    )

Try the protti package in your browser

Any scripts or data that you put into this service are public.

protti documentation built on Sept. 11, 2024, 7:53 p.m.