knitr::opts_chunk$set(echo = TRUE,message=FALSE, warning=FALSE)

Preliminaries

library(dplyr)
library(knitr)

library(mzR)
library(MSnbase)
remote_dir=file.path("https://raw.githubusercontent.com/PacktPublishing",
                      "R-Bioinformatics-Cookbook/master/datasets/ch6")

Recipe: Visualizing distributions of peptide hit counts to find thresholds

1. Load the libraries and data:

library(MSnID)
library(data.table)
library(dplyr)
library(ggplot2)

msnid_file <- "HeLa_180123_m43_r2_CAM.mzid.gz"
if (!file.exists(msnid_file)) {
  download.file(file.path(remote_dir,msnid_file), msnid_file)
}
msnid <- MSnID()
msnid <- read_mzIDs(msnid, msnid_file)
peptide_info <- as(msnid, "data.table")

2. Filter out decoy data rows and get a count of every time a peptide appears:

  per_peptide_counts <- peptide_info %>%
  filter(isDecoy == FALSE) %>%
  group_by(pepSeq) %>%
  summarise(count = n() ) %>%
  mutate(sample = rep("peptide_counts", length(counts) ) )
## 3. Create a violin and jitter plot of the hit counts:
  g <- per_peptide_counts %>% ggplot() + aes( sample, count) + geom_jitter() + geom_violin() +
  scale_y_log10()

Peptide Counts

print(g)

4. Create a plot of cumulative hit counts for peptides sorted by hit count:

  g2 <- per_peptide_counts %>% arrange(count) %>%
   mutate(cumulative_hits = cumsum(count), peptide = 1:length(count)) %>%
   ggplot() + aes(peptide, cumulative_hits) + geom_line()

Create a plot of cumulative hit counts for peptides sorted by hit count

print(g2)

5. Filter out very low and very high peptide hits and then replot them:

  filtered_per_peptide_counts <- per_peptide_counts %>% filter(count >= 5, count <= 2500)
  g3 <- filtered_per_peptide_counts %>% ggplot() + aes( sample, count) +
    geom_jitter() + geom_violin() + scale_y_log10()

Visualization of cumulative hit counts for peptides sorted by hit count

print(g3) 

Recipe: Converting MS formats to move data between tools

1 :Load the library and import the source data file:

  library(mzR) 
mzxml_file <- "threonine_i2_e35_pH_tree.mzXML" 
if (!file.exists(mzxml_file)) {
  download.file(file.path(remote_dir, mzxml_file), mzxml_file)
}
mzdata <- openMSfile(mzxml_file)  # file "threonine_i2_e35_pH_tree.mzXML"

2. Extract the header and peak data:

header_info <- header(mzdata) 
peak_data_list <- spectra(mzdata)

3. Write the data into a new format file:

writeMSData(peak_data_list, file.path(remote_dir, "out.mz"),
            header = header_info, outformat = "mzml", rtime_seconds = TRUE)

Recipe: Matching spectra to peptides for verification with protViz

1. Load in the libraries and the MS data:

  library(mzR)
  library(protViz) 
  mzml_file <- "MM8.mzML"  # was downloaded earlier
  ms <- openMSfile(mzml_file) 

2. Extract the peaks and retention time from the spectrum:

  p <- peaks(ms,2) 
  spec <- list(mZ = p[,1], intensity = p[,2]) 

Visualizing Spectrum

df <- data.frame(cbind(mZ=spec$mZ, intensity=spec$intensity))
df %>% ggplot(aes(x=mZ, y=intensity)) + geom_line()

3. Create a plot of theoretical versus observed ion masses:

m <- psm("PEPTIDESEQ", spec, plot=TRUE)


eckartbindewald/bfxapps2 documentation built on Feb. 6, 2025, 3:22 a.m.