In MassDynamics/lfq_processing: Processes Proteomics Files for Quantification.

knitr::opts_chunk$set(fig.cap = NULL, fig.path = params$output_figure)

library(data.table)
library(ggplot2)
library(ggrepel)
library(GGally)
library(umap)
library(FactoMineR)
library(factoextra)
library(corrplot)
library(viridis)
library(ggpubr)
library(Hmisc)
library(plotly)
library(stringr)
library(bit64)

num_files <- expdes[, .N]
run_per_condition <- expdes[, .(countRepMax = .N), by = .(experiment)]
setnames(run_per_condition, "experiment", "condition")

# for fractions, create file name from mqExperiment and Fraction
if (!("file_name" %in% colnames(expdes))){
  expdes$file_name = paste(expdes$experiment, " - ", expdes$Replicate)
}


if (!protein_only){
  mod_pept_int_rep <- merge(
    run_per_condition,
    mod_pept_int[Imputed == 0, .(Repcount = .N), by = .(id, condition)],
    by = c("condition")
  )
  mod_pept_int_rep[, repPC := Repcount/countRepMax]
  mod_pept_id_in_a_cond <- mod_pept_int_rep[repPC >= 0.5, unique(id)]
  mod_pept_int[, Valid := 0L]
  mod_pept_int[id %in% mod_pept_id_in_a_cond, Valid := 1L]
  rm(mod_pept_id_in_a_cond, mod_pept_int_rep)

  pept_int_rep <- merge(
    run_per_condition,
    pept_int[Imputed == 0, .(Repcount = .N), by = .(id, condition)],
    by = c("condition")
  )
  pept_int_rep[, repPC := Repcount/countRepMax]
  pept_id_in_a_cond <- pept_int_rep[repPC >= 0.5, unique(id)]
  pept_int[, Valid := 0L]
  pept_int[id %in% pept_id_in_a_cond, Valid := 1L]
  rm(pept_id_in_a_cond, pept_int_rep)

}

prot_int_rep <- merge(
  run_per_condition,
  prot_int[Imputed == 0, .(Repcount = .N), by = .(id, condition)],
  by = c("condition")
)
prot_int_rep[, repPC := Repcount/countRepMax]
prot_id_in_a_cond <- prot_int_rep[repPC >= 0.5, unique(id)]
prot_int[, Valid := 0L]
prot_int[id %in% prot_id_in_a_cond, Valid := 1L]
rm(prot_id_in_a_cond, prot_int_rep)

More about the QC Report

This QC Report is designed to help Scientists quickly assess the several aspect of experiment quality, There are 4 categories in the QC report:

Experiment Health
Feature Completeness
Sample Preparation
Identifications

\newpage

1. Experiment Health

A. Principal Component Analysis (PCA) and Dimensionality Reduction {.tabset .tabset-fade .tabset-pills}

Experiment Health Details

PCA is an unsupervised machine learning algorithm that clusters your data, showing us which replicates were more or less similar. PCA creates new dimensions that collapse the overall variance in your data into fewer dimensions.

For a healthy experiment we expect:

Technical Replicates should be clustered tightly together.
Biological Replicates should cluster more than non replicates.
Different experimental conditions may vary systematically in the first few dimensions.

If replicates don’t cluster together, it is possible that the variance between the replicates is greater than the variance between the experimental conditions. It is also possible that the difference between the experimental conditions is encoded in other dimensions not represented here.

A scree plot shows the amount of variance explained by each dimension extracted by PCA. A high degree of variance in the first few dimensions may suggest large differences between your samples.

Proteins

# Prepare PCA data table
# limiting the data by number of missing values as per input parameters:
# limitImputedRatio
# checkBy
checkBy = "Experiment"
if (checkBy == "Experiment") {
  pca_dt <- prot_int[Valid == 1, .(
    id,
    condition,
    Replicate,
    log2NInt,
    run_id
  )]
  pca_dt <- dcast(pca_dt, condition + Replicate ~ id, value.var = "log2NInt")

} else {
  # pca_dt <- prot_int[Valid == 1, .(
  #   id,
  #   condition,
  #   Replicate,
  #   log2NInt,
  #   run_id
  # )]
  # pca_dt <- dcast(pca_dt, condition + Replicate ~ id, value.var = "log2NInt")
  #
  print("ERROR")
}

num_dimensions = min(pca_dt[, .N-1], 10)
res.pca <- PCA(pca_dt[, 3:ncol(pca_dt)], graph = FALSE, ncp = num_dimensions)
eig.val <- get_eigenvalue(res.pca)
eig.val <- data.table(dims = rownames(eig.val), eig.val)

samples.pca <- get_pca_ind(res.pca)
samples.coord <- data.table(pca_dt[, 1:2], samples.pca$coord)
samples.coord[, Run := str_c(condition, Replicate, sep = " - ")]

#if no fracs
samples.coord = merge(samples.coord, expdes, by.x = c("condition", "Replicate"), by.y = c("experiment", "Replicate"))

p <- ggplot(samples.coord, aes(x = Dim.1, y=Dim.2, colour=condition, fill=condition, label=Run)) +
  stat_ellipse(geom = "polygon", alpha=0.1) +
  geom_point(size = 3, alpha = 0.7) +
  theme_minimal() +
  scale_x_continuous(str_c("PCA 1 - ", eig.val[dims == "Dim.1", round(variance.percent,1)], "%")) +
  scale_y_continuous(str_c("PCA 2 - ", eig.val[dims == "Dim.2", round(variance.percent,1)], "%"))


if (output_format == "pdf"){
  p = p +   geom_label_repel(aes(label = file_name, fill = NULL),
                  box.padding   = 0.35, 
                  point.padding = 0.5,
                  segment.color = 'grey50',
                  show.legend = FALSE)
  p
} else {
  ggplotly(p, tooltip = c("label")) %>% config(displayModeBar = T, 
                                                  modeBarButtons = list(list('toImage')),
                                                  displaylogo = F)
}

p <- ggplot(eig.val[1:num_dimensions], aes(x=reorder(dims, -`variance.percent`), y=`variance.percent`)) +
  geom_bar(stat="identity", fill = "skyblue2") +
  theme_minimal() +
  geom_text_repel(aes(label=(round(`variance.percent`,1))), direction = 'y') +
  scale_x_discrete("PCA components") +
  scale_y_continuous("% Variance") +
  ggtitle("Scree plot")


if (output_format == "pdf"){
  p
} else {
  ggplotly(p, tooltip = c("y")) %>% config(displayModeBar = T, 
                                           modeBarButtons = list(list('toImage')),
                                           displaylogo = F)
}

name_of_column <- colnames(prot)[colnames(prot) == "adj.P.Val"]
if (!identical(name_of_column, character(0)) && prot[get(name_of_column) <= 0.05, .N] > 1) {
  DE_ids <- prot[get(name_of_column) <= 0.05, unique(id)]
  pca_dt <- prot_int[id %in% DE_ids, .(
    id,
    condition,
    Replicate,
    log2NInt
  )]
  pca_dt <- dcast(pca_dt, condition + Replicate ~ id, value.var = "log2NInt")

  num_dimensions = min(pca_dt[, .N-1], 10)
  res.pca <- PCA(pca_dt[, 3:ncol(pca_dt)], graph = FALSE, ncp = num_dimensions)
  eig.val <- get_eigenvalue(res.pca)
  eig.val <- data.table(dims = rownames(eig.val), eig.val)

  samples.pca <- get_pca_ind(res.pca)
  samples.coord <- data.table(pca_dt[, 1:2], samples.pca$coord)
  samples.coord[, Run := str_c(condition, Replicate, sep = " - ")]

  #if no fracs
  samples.coord = merge(samples.coord, expdes, by.x = c("condition", "Replicate"), by.y = c("experiment", "Replicate"))
  label = "file_name"

  p <- ggplot(samples.coord, aes(x = Dim.1, y=Dim.2, colour=condition, fill=condition, label=Run)) +
    stat_ellipse(geom = "polygon", alpha=0.1) +
    geom_point(size = 3, alpha = 0.7) +
    theme_minimal() +
    scale_x_continuous(str_c("PCA 1 - ", eig.val[dims == "Dim.1", round(variance.percent,1)], "%")) +
    scale_y_continuous(str_c("PCA 2 - ", eig.val[dims == "Dim.2", round(variance.percent,1)], "%")) +
    ggtitle(str_c("PCA plot using the ", length(DE_ids), " differentially expressed proteins"))


  if (output_format == "pdf"){
    p = p +   geom_label_repel(aes(label = file_name, fill = NULL),
                               box.padding   = 0.35, 
                               point.padding = 0.5,
                               segment.color = 'grey50',
                               show.legend = FALSE)
    p
  } else {
    ggplotly(p, tooltip = c("label")) %>% config(displayModeBar = T, 
                                                 modeBarButtons = list(list('toImage')),
                                                 displaylogo = F)
  }
}

Modification specific peptides

# Prepare PCA data table
# limiting the data by number of missing values as per input parameters:
# limitImputedRatio
# checkBy
if (!protein_only){
  if (checkBy == "Experiment") {
    pca_dt <- mod_pept_int[Valid == 1, .(
      id,
      condition,
      Replicate,
      log2NInt,
      run_id
    )]
    pca_dt <- dcast(pca_dt, condition + Replicate ~ id, value.var = "log2NInt")

  } else {
    # pca_dt <- mod_pept_int[Valid == 1, .(
    #   id,
    #   condition,
    #   Replicate,
    #   log2NInt,
    #   run_id
    # )]
    # pca_dt <- dcast(pca_dt, condition + Replicate ~ id, value.var = "log2NInt")
    #
    print("ERROR")
  }

  num_dimensions = min(pca_dt[, .N-1], 10)
  res.pca <- PCA(pca_dt[, 3:ncol(pca_dt)], graph = FALSE, ncp = num_dimensions)
  eig.val <- get_eigenvalue(res.pca)
  eig.val <- data.table(dims = rownames(eig.val), eig.val)

  samples.pca <- get_pca_ind(res.pca)
  samples.coord <- data.table(pca_dt[, 1:2], samples.pca$coord)
  samples.coord[, Run := str_c(condition, Replicate, sep = " - ")]

  #if no fracs
  samples.coord = merge(samples.coord, expdes, by.x = c("condition", "Replicate"), by.y = c("experiment", "Replicate"))
  label = "file_name"


  p <- ggplot(samples.coord, aes(x = Dim.1, y=Dim.2, colour=condition, fill=condition, label=Run)) +
    stat_ellipse(geom = "polygon", alpha=0.1) +
    geom_point(size = 3, alpha = 0.7) +
    theme_minimal() +
    scale_x_continuous(str_c("PCA 1 - ", eig.val[dims == "Dim.1", round(variance.percent,1)], "%")) +
    scale_y_continuous(str_c("PCA 2 - ", eig.val[dims == "Dim.2", round(variance.percent,1)], "%"))



  if (output_format == "pdf"){
    p = p +   geom_label_repel(aes(label = file_name, fill = NULL),
                               box.padding   = 0.35, 
                               point.padding = 0.5,
                               segment.color = 'grey50',
                               show.legend = FALSE)
    p
  } else {
    ggplotly(p, tooltip = c("label")) %>% config(displayModeBar = T, 
                                                 modeBarButtons = list(list('toImage')),
                                                 displaylogo = F)
  }

} else {
  print("Protein Only Analysis. This chunk was ignored")
}

if (!protein_only){
  scree_plot <- ggplot(eig.val[1:num_dimensions], aes(x=reorder(dims, -`variance.percent`), y=`variance.percent`)) +
    geom_bar(stat="identity", fill = "skyblue2") +
    theme_minimal() +
    geom_text_repel(aes(label=(round(`variance.percent`,1))), direction = 'y') +
    scale_x_discrete("PCA components") +
    scale_y_continuous("% Variance") +
    ggtitle("Scree plot")

  if (output_format == "pdf"){
    scree_plot
  } else {
    ggplotly(scree_plot, tooltip = c("y")) %>% config(displayModeBar = T, 
                                                      modeBarButtons = list(list('toImage')),
                                                      displaylogo = F)
  }
}

if (!protein_only){
  name_of_column <- colnames(mod_pept)[colnames(mod_pept) == "adj.P.Val"]
  if (!identical(name_of_column, character(0)) && mod_pept[get(name_of_column) <= 0.05, .N] > 1) {
    DE_ids <- mod_pept[get(name_of_column) <= 0.05, unique(id)]
    pca_dt <- mod_pept_int[id %in% DE_ids, .(
      id,
      condition,
      Replicate,
      log2NInt
    )]
    pca_dt <- dcast(pca_dt, condition + Replicate ~ id, value.var = "log2NInt")

    num_dimensions = min(pca_dt[, .N-1], 10)
    res.pca <- PCA(pca_dt[, 3:ncol(pca_dt)], graph = FALSE, ncp = num_dimensions)
    eig.val <- get_eigenvalue(res.pca)
    eig.val <- data.table(dims = rownames(eig.val), eig.val)

    samples.pca <- get_pca_ind(res.pca)
    samples.coord <- data.table(pca_dt[, 1:2], samples.pca$coord)
    samples.coord[, Run := str_c(condition, Replicate, sep = " - ")]


    #if no fracs
    samples.coord = merge(samples.coord, expdes, by.x = c("condition", "Replicate"), by.y = c("experiment", "Replicate"))
    label = "file_name"


    p <- ggplot(samples.coord, aes(x = Dim.1, y=Dim.2, colour=condition, fill=condition, label=Run)) +
      stat_ellipse(geom = "polygon", alpha=0.1) +
      geom_point(size = 3, alpha = 0.7) +
      theme_minimal() +
      scale_x_continuous(str_c("PCA 1 - ", eig.val[dims == "Dim.1", round(variance.percent,1)], "%")) +
      scale_y_continuous(str_c("PCA 2 - ", eig.val[dims == "Dim.2", round(variance.percent,1)], "%")) +
      ggtitle(str_c("PCA plot using the ", length(DE_ids), " differentially expressed peptides"))


    if (output_format == "pdf"){
      p = p +   geom_label_repel(aes(label = file_name, fill = NULL),
                                 box.padding   = 0.35, 
                                 point.padding = 0.5,
                                 segment.color = 'grey50',
                                 show.legend = FALSE)
      p
    } else {
      ggplotly(p, tooltip = c("label")) %>% config(displayModeBar = T, 
                                                   modeBarButtons = list(list('toImage')),
                                                   displaylogo = F)
    }
  }

}

Peptides

# Prepare PCA data table
# limiting the data by number of missing values as per input parameters:
# limitImputedRatio
# checkBy

if (!protein_only){

  if (checkBy == "Experiment") {
    pca_dt <- pept_int[Valid == 1, .(
      id,
      condition,
      Replicate,
      log2NInt,
      run_id
    )]
    pca_dt <- dcast(pca_dt, condition + Replicate ~ id, value.var = "log2NInt")

  } else {
    # pca_dt <- pept_int[Valid == 1, .(
    #   id,
    #   condition,
    #   Replicate,
    #   log2NInt,
    #   run_id
    # )]
    # pca_dt <- dcast(pca_dt, condition + Replicate ~ id, value.var = "log2NInt")
    #
    print("ERROR")
  }

  num_dimensions = min(pca_dt[, .N-1], 10)
  res.pca <- PCA(pca_dt[, 3:ncol(pca_dt)], graph = FALSE, ncp = num_dimensions)
  eig.val <- get_eigenvalue(res.pca)
  eig.val <- data.table(dims = rownames(eig.val), eig.val)

  samples.pca <- get_pca_ind(res.pca)
  samples.coord <- data.table(pca_dt[, 1:2], samples.pca$coord)
  samples.coord[, Run := str_c(condition, Replicate, sep = " - ")]

  #if no fracs
  samples.coord = merge(samples.coord, expdes, by.x = c("condition", "Replicate"), by.y = c("experiment", "Replicate"))
  label = "file_name"


  p <- ggplot(samples.coord, aes(x = Dim.1, y=Dim.2, colour=condition, fill=condition, label=Run)) +
    stat_ellipse(geom = "polygon", alpha=0.1) +
    geom_point(size = 3, alpha = 0.7) +
    theme_minimal() +
    scale_x_continuous(str_c("PCA 1 - ", eig.val[dims == "Dim.1", round(variance.percent,1)], "%")) +
    scale_y_continuous(str_c("PCA 2 - ", eig.val[dims == "Dim.2", round(variance.percent,1)], "%"))


  if (output_format == "pdf"){
    p = p +   geom_label_repel(aes(label = file_name, fill = NULL),
                               box.padding   = 0.35, 
                               point.padding = 0.5,
                               segment.color = 'grey50',
                               show.legend = FALSE)
    p
  } else {
    ggplotly(p, tooltip = c("label")) %>% config(displayModeBar = T, 
                                                 modeBarButtons = list(list('toImage')),
                                                 displaylogo = F)
  }
}

if (!protein_only){
  scree_plot <- ggplot(eig.val[1:num_dimensions], aes(x=reorder(dims, -`variance.percent`), y=`variance.percent`)) +
    geom_bar(stat="identity", fill = "skyblue2") +
    theme_minimal() +
    geom_text_repel(aes(label=(round(`variance.percent`,1))), direction = 'y') +
    scale_x_discrete("PCA components") +
    scale_y_continuous("% Variance") +
    ggtitle("Scree plot")

  if (output_format == "pdf"){
    scree_plot
  } else {
    ggplotly(scree_plot, tooltip = c("y")) %>% config(displayModeBar = T, 
                                                      modeBarButtons = list(list('toImage')),
                                                      displaylogo = F)
  }
}

if (!protein_only){
  name_of_column <- colnames(pept)[colnames(pept) == "adj.P.Val"]
  if (!identical(name_of_column, character(0)) && pept[get(name_of_column) <= 0.05, .N] > 1) {
    DE_ids <- pept[get(name_of_column) <= 0.05, unique(id)]
    pca_dt <- pept_int[id %in% DE_ids, .(
      id,
      condition,
      Replicate,
      log2NInt
    )]
    pca_dt <- dcast(pca_dt, condition + Replicate ~ id, value.var = "log2NInt")

    num_dimensions = min(pca_dt[, .N-1], 10)
    res.pca <- PCA(pca_dt[, 3:ncol(pca_dt)], graph = FALSE, ncp = num_dimensions)
    eig.val <- get_eigenvalue(res.pca)
    eig.val <- data.table(dims = rownames(eig.val), eig.val)

    samples.pca <- get_pca_ind(res.pca)
    samples.coord <- data.table(pca_dt[, 1:2], samples.pca$coord)
    samples.coord[, Run := str_c(condition, Replicate, sep = " - ")]



    #if no fracs
    samples.coord = merge(samples.coord, expdes, by.x = c("condition", "Replicate"), by.y = c("experiment", "Replicate"))
    label = "file_name"

    p <- ggplot(samples.coord, aes(x = Dim.1, y=Dim.2, colour=condition, fill=condition, label=Run)) +
      stat_ellipse(geom = "polygon", alpha=0.1) +
      geom_point(size = 3, alpha = 0.7) +
      theme_minimal() +
      scale_x_continuous(str_c("PCA 1 - ", eig.val[dims == "Dim.1", round(variance.percent,1)], "%")) +
      scale_y_continuous(str_c("PCA 2 - ", eig.val[dims == "Dim.2", round(variance.percent,1)], "%")) +
      ggtitle(str_c("PCA plot using the ", length(DE_ids), " differentially expressed peptides"))

    if (output_format == "pdf"){
      p = p +   geom_label_repel(aes(label = file_name, fill = NULL),
                                 box.padding   = 0.35, 
                                 point.padding = 0.5,
                                 segment.color = 'grey50',
                                 show.legend = FALSE)
      p
    } else {
      ggplotly(p, tooltip = c("label")) %>% config(displayModeBar = T, 
                                                   modeBarButtons = list(list('toImage')),
                                                   displaylogo = F)
    }

  }
}

\newpage

B. Quantitative values CV distributions {.tabset .tabset-fade .tabset-pills}

Coefficient of Variation Details

The Coefficient of Variation or Relative Standard Deviation, is calculated by the ratio of the standard deviation to the mean. It is used to measure the precision of a measure, in this case protein/peptide intensity. Here the CV is calculated within experimental conditions separately.

Proteins

cvdt <- prot_int[Imputed == 0][, countRep := .N, by = .(id, condition)]
cvdt[, countRepMax := max(countRep), by = .(id, condition)]
cvdt[, ReplicatePC := countRep/countRepMax]
cvdt[, intensity := as.double(intensity)]
cvdt <- cvdt[ReplicatePC >= 0.5, .(cv = sd(intensity)/mean(intensity)), by = .(id, condition)]

p <- ggplot(cvdt, aes(x=cv, fill=condition, colour=condition)) +
  geom_density(alpha=0.4) +
  theme_minimal() +
  scale_x_continuous("% CV", labels = scales::percent) +
  ggtitle("Proteins - LFQ intensity CV")


if (output_format == "pdf"){
  p
} else {
  ggplotly(p) %>% config(displayModeBar = T, 
                        modeBarButtons = list(list('toImage')),
                        displaylogo = F)
}

Modification specific peptides

if (!protein_only){
cvdt <- mod_pept_int[Imputed == 0][, countRep := .N, by = .(id, condition)]
cvdt[, countRepMax := max(countRep), by = .(id, condition)]
cvdt[, ReplicatePC := countRep/countRepMax]
cvdt[, intensity := as.double(intensity)]
cvdt <- cvdt[ReplicatePC >= 0.5, .(cv = sd(intensity)/mean(intensity)), by = .(id, condition)]

p <- ggplot(cvdt, aes(x=cv, fill=condition, colour=condition)) +
  geom_density(alpha=0.4) +
  theme_minimal() +
  scale_x_continuous("% CV", labels = scales::percent) +
  ggtitle("Modification specific peptides - intensity CV")


if (output_format == "pdf"){
  p
} else {
  ggplotly(p) %>% config(displayModeBar = T, 
                        modeBarButtons = list(list('toImage')),
                        displaylogo = F)
}
}

Peptides

if (!protein_only){
cvdt <- pept_int[Imputed == 0][, countRep := .N, by = .(id, condition)]
cvdt[, countRepMax := max(countRep), by = .(id, condition)]
cvdt[, ReplicatePC := countRep/countRepMax]
cvdt[, intensity := as.double(intensity)]
cvdt <- cvdt[ReplicatePC >= 0.5, .(cv = sd(intensity)/mean(intensity)), by = .(id, condition)]

p <- ggplot(cvdt, aes(x=cv, fill=condition, colour=condition)) +
  geom_density(alpha=0.4) +
  theme_minimal() +
  scale_x_continuous("% CV", labels = scales::percent) +
  ggtitle("Peptide - LFQ intensity CV")

if (output_format == "pdf"){
  p
} else {
  ggplotly(p) %>% config(displayModeBar = T, 
                        modeBarButtons = list(list('toImage')),
                        displaylogo = F)
}
}

\newpage

C. Sample and Intensity Correlation Plots {.tabset .tabset-fade .tabset-pills}

Sample and Intensity Correlation Details

Pairwise Intensity correlation of all the LC-MS runs are measures of the similarity between different samples or replicates. Samples belonging to the same experimental condition are expected to have higher correlation values than samples across different experimental conditions.

Proteins

if ("file_name" %in% colnames(prot_int)){
  filenames <- prot_int[!(is.na(file_name)), unique(run_id)]
} else {
  filenames <- prot_int[, unique(run_id)]
}

int_corr_dt <- dcast.data.table(
  prot_int[Imputed == 0],
  id ~ condition + Replicate,
  value.var = "log2NInt",
  sep = "_"
)

# must have >4 observations for spearman correlation to run
test = try(rcorr(as.matrix(int_corr_dt[, 2:ncol(int_corr_dt)])), silent = TRUE)

if (class(test) != "try-error") {

  DT_corMatrix <- rcorr(as.matrix(int_corr_dt[, 2:ncol(int_corr_dt)]))
  DT_corMatrix <- DT_corMatrix$r
  DT_corMatrix[DT_corMatrix <= -1] = -1
  DT_corMatrix[DT_corMatrix >= 1] = 1
  #corrplot(DT_corMatrix, type = "upper", title = "Correlation Matrix", tl.cex = 0.5, mar = c(1,0,1.5,0))
  if (num_files > 10) {
    corrplot(DT_corMatrix, type = "upper", title = "Correlation Matrix", tl.cex = 0.5, mar = c(1,0,1.5,0))

  } else {
    corrplot(DT_corMatrix, type = "upper", title = "Correlation Matrix", tl.cex = 0.5, mar = c(1,0,1.5,0), addCoef.col = "white", number.cex = .5)

  }

  name_of_column <- colnames(prot)[colnames(prot) == "adj.P.Val"]
  if (!identical(name_of_column, character(0)) && prot[get(name_of_column) <= 0.05, .N] > 4) {
    # DE_ids <- prot[get(name_of_column) <= 0.05, unique(ProteinGroupId)]
    DE_ids <- prot[get(name_of_column) <= 0.05, unique(id)]
    int_corr_dt <- int_corr_dt[id %in% DE_ids]

    test = try(rcorr(as.matrix(int_corr_dt[, 2:ncol(int_corr_dt)])), silent = TRUE)

    if (class(test) != "try-error"){
      DT_corMatrix <- rcorr(as.matrix(int_corr_dt[, 2:ncol(int_corr_dt)]))
      DT_corMatrix <- DT_corMatrix$r

      DT_corMatrix[DT_corMatrix <= -1] = -1
      DT_corMatrix[DT_corMatrix >= 1] = 1

      if (num_files > 10) {
        corrplot(DT_corMatrix, type = "upper", title = "Correlation Matrix - DE proteins", tl.cex = 0.5, mar = c(1,0,1.5,0))

      } else {
        corrplot(DT_corMatrix, type = "upper", title = "Correlation Matrix - DE proteins", tl.cex = 0.5, mar = c(1,0,1.5,0), addCoef.col = "white", number.cex = .5)

      }
    }
  }




  if (length(filenames) > 4) {
    filenames <- sample(filenames, 4)
  }

  int_corr_dt <- dcast.data.table(
    prot_int[run_id %in% filenames & Imputed == 0],
    id ~ condition + Replicate,
    value.var = "log2NInt",
    sep = "_"
  )

  myplot <- function(data, mapping, ...) {
    ggplot(data=data, mapping=mapping) +
      geom_point(size = 1, alpha = 0.3, colour = "skyblue2") +
      theme_minimal()
  }
  p <- ggpairs(int_corr_dt, columns = colnames(int_corr_dt)[2:ncol(int_corr_dt)],
               upper = list(
                 continuous = myplot
               ),
               lower = "blank",
               diag = NULL
  )
  p

}

Modification specific peptides

if (!protein_only){
  if ("file_name" %in% colnames(prot_int)){
    filenames <- prot_int[!(is.na(file_name)), unique(run_id)]
  } else {
    filenames <- prot_int[, unique(run_id)]
  }

  int_corr_dt <- dcast.data.table(
    mod_pept_int[Imputed == 0],
    id ~ condition + Replicate,
    value.var = "log2NInt",
    sep = "_"
  )

  test = try(rcorr(as.matrix(int_corr_dt[, 2:ncol(int_corr_dt)])), silent = TRUE)

  if (class(test) != "try-error") {


    DT_corMatrix <- rcorr(as.matrix(int_corr_dt[, 2:ncol(int_corr_dt)]))
    DT_corMatrix <- DT_corMatrix$r
    DT_corMatrix[DT_corMatrix <= -1] = -1
    DT_corMatrix[DT_corMatrix >= 1] = 1
    #corrplot(DT_corMatrix, type = "upper", title = "Correlation Matrix", tl.cex = 0.5, mar = c(1,0,1.5,0))
    if (num_files > 10) {
      corrplot(DT_corMatrix, type = "upper", title = "Correlation Matrix", tl.cex = 0.5, mar = c(1,0,1.5,0))

    } else {
      corrplot(DT_corMatrix, type = "upper", title = "Correlation Matrix", tl.cex = 0.5, mar = c(1,0,1.5,0), addCoef.col = "white", number.cex = .5)

    }

    name_of_column <- colnames(mod_pept)[colnames(mod_pept) == "adj.P.Val"]
    if (!identical(name_of_column, character(0)) && mod_pept[get(name_of_column) <= 0.05, .N] > 4) {
      # DE_ids <- prot[get(name_of_column) <= 0.05, unique(ProteinGroupId)]
      DE_ids <- mod_pept[get(name_of_column) <= 0.05, unique(id)]
      int_corr_dt <- int_corr_dt[id %in% DE_ids]

      test = try(rcorr(as.matrix(int_corr_dt[, 2:ncol(int_corr_dt)])), silent = TRUE)

      if (class(test) != "try-error") {
        DT_corMatrix <- rcorr(as.matrix(int_corr_dt[, 2:ncol(int_corr_dt)]))
        DT_corMatrix <- DT_corMatrix$r

        DT_corMatrix[DT_corMatrix <= -1] = -1
        DT_corMatrix[DT_corMatrix >= 1] = 1

        if (num_files > 10) {
          corrplot(DT_corMatrix, type = "upper", title = "Correlation Matrix - DE peptides", tl.cex = 0.5, mar = c(1,0,1.5,0))

        } else {
          corrplot(DT_corMatrix, type = "upper", title = "Correlation Matrix - DE peptides", tl.cex = 0.5, mar = c(1,0,1.5,0), addCoef.col = "white", number.cex = .5)

        }
      }

    }



    if (length(filenames) > 4) {
      filenames <- sample(filenames, 4)
    }

    int_corr_dt <- dcast.data.table(
      mod_pept_int[run_id %in% filenames & Imputed == 0],
      id ~ condition + Replicate,
      value.var = "log2NInt",
      sep = "_"
    )

    myplot <- function(data, mapping, ...) {
      ggplot(data=data, mapping=mapping) +
        geom_point(size = 1, alpha = 0.3, colour = "skyblue2") +
        theme_minimal()
    }
    p <- ggpairs(int_corr_dt, columns = colnames(int_corr_dt)[2:ncol(int_corr_dt)],
                 upper = list(
                   continuous = myplot
                 ),
                 lower = "blank",
                 diag = NULL
    )
    p
  }
}

Peptides

if (!protein_only){
  if ("file_name" %in% colnames(prot_int)){
    filenames <- prot_int[!(is.na(file_name)), unique(run_id)]
  } else {
    filenames <- prot_int[, unique(run_id)]
  }

  int_corr_dt <- dcast.data.table(
    pept_int[Imputed == 0],
    id ~ condition + Replicate,
    value.var = "log2NInt",
    sep = "_"
  )


  test = try(rcorr(as.matrix(int_corr_dt[, 2:ncol(int_corr_dt)])), silent = TRUE)

  if (class(test) != "try-error"){

    DT_corMatrix <- rcorr(as.matrix(int_corr_dt[, 2:ncol(int_corr_dt)]))
    DT_corMatrix <- DT_corMatrix$r
    DT_corMatrix[DT_corMatrix <= -1] = -1
    DT_corMatrix[DT_corMatrix >= 1] = 1
    #corrplot(DT_corMatrix, type = "upper", title = "Correlation Matrix", tl.cex = 0.5, mar = c(1,0,1.5,0))
    if (num_files > 10) {
      corrplot(DT_corMatrix, type = "upper", title = "Correlation Matrix", tl.cex = 0.5, mar = c(1,0,1.5,0))

    } else {
      corrplot(DT_corMatrix, type = "upper", title = "Correlation Matrix", tl.cex = 0.5, mar = c(1,0,1.5,0), addCoef.col = "white", number.cex = .5)

    }

    name_of_column <- colnames(pept)[colnames(pept) == "adj.P.Val"]
    if (!identical(name_of_column, character(0)) && mod_pept[get(name_of_column) <= 0.05, .N] > 4) {
      # DE_ids <- prot[get(name_of_column) <= 0.05, unique(ProteinGroupId)]
      DE_ids <- pept[get(name_of_column) <= 0.05, unique(id)]
      int_corr_dt <- int_corr_dt[id %in% DE_ids]

      test = try(rcorr(as.matrix(int_corr_dt[, 2:ncol(int_corr_dt)])), silent = TRUE)

      if (class(test) != "try-error"){
        DT_corMatrix <- rcorr(as.matrix(int_corr_dt[, 2:ncol(int_corr_dt)]))
        DT_corMatrix <- DT_corMatrix$r

        DT_corMatrix[DT_corMatrix <= -1] = -1
        DT_corMatrix[DT_corMatrix >= 1] = 1

        if (num_files > 10) {
          corrplot(DT_corMatrix, type = "upper", title = "Correlation Matrix - DE peptides", tl.cex = 0.5, mar = c(1,0,1.5,0))

        } else {
          corrplot(DT_corMatrix, type = "upper", title = "Correlation Matrix - DE peptides", tl.cex = 0.5, mar = c(1,0,1.5,0), addCoef.col = "white", number.cex = .5)

        }
      }

    }




    if (length(filenames) > 4) {
      filenames <- sample(filenames, 4)
    }

    int_corr_dt <- dcast.data.table(
      pept_int[run_id %in% filenames & Imputed == 0],
      id ~ condition + Replicate,
      value.var = "log2NInt",
      sep = "_"
    )

    myplot <- function(data, mapping, ...) {
      ggplot(data=data, mapping=mapping) +
        geom_point(size = 1, alpha = 0.3, colour = "skyblue2") +
        theme_minimal()
    }
    p <- ggpairs(int_corr_dt, columns = colnames(int_corr_dt)[2:ncol(int_corr_dt)],
                 upper = list(
                   continuous = myplot
                 ),
                 lower = "blank",
                 diag = NULL
    )
    p
  }
}

\newpage

2. Feature Completeness

A. Data completeness {.tabset .tabset-fade .tabset-pills}

Data Completeness Details

The quantitative analysis relies on the comparison of intensities corresponding to the same protein/peptide across multiple samples; the ideal situation is when a protein is measured in all the samples (100%).

In the first plot we show a frequency plot that measures the percentage of samples where each protein/peptide was detected. The presence of a large number of proteins/peptides at low percentage values could signify an issue with the sample acquisition.

In the second plot we show the percentage of proteins/peptides detected in each LC-MS/MS run. Ideally all the samples should be clustered together, the presence of samples at low percentage values could signify an issue during the acquisition of that specific sample. If the samples are clustered depending on the experimental condition it could signify a systematic bias.

\newpage

Proteins

dt <- unique(prot_int[Imputed == 0, .(ReplicatePC = .N/num_files), by = .(id)])

ymax <- max(dt[, .N, by = .(ReplicatePC)][, N])
ymax <- 0.01 * ymax + ymax

p <- ggplot(dt, aes(x = ReplicatePC)) +
  annotate('rect', xmin = 0.7, xmax = 1.05, ymin = 0, ymax = ymax, alpha=0.2)  +
  geom_histogram(binwidth = max(0.1, round(1/max(num_files), 2)), fill="skyblue2") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.2),
        panel.grid.major.y = element_blank(),
        panel.border = element_blank(),
        axis.ticks.y = element_blank()
  ) +
  scale_x_continuous("Percentage of measurements per protein", labels = scales::percent, limits = c(0, 1.15), breaks = seq(0, 1, 0.1)) +
  annotate('text', x = 0.5, y = 0.8*ymax, label=str_c("Number of proteins with\nmissing values <= 30%:\n ", dt[ReplicatePC >= 0.7, .N]))



if (output_format == "pdf"){
  p
} else {
  ggplotly(p, tooltip = c("y")) %>% config(displayModeBar = T, 
                                               modeBarButtons = list(list('toImage')),
                                               displaylogo = F)
}

dt <- prot_int[Imputed == 0, .(`% measured values` = .N/prot_int[, length(unique(id))]), by = .(run_id, condition, Replicate)]
dt[, Run := str_c(condition, Replicate, sep = " - ")]

dt[, `% measured values` := 100*(round(`% measured values`, 2))]

p<- ggplot(dt, aes(y = `% measured values`, x = Run, 
                   color = condition, fill = condition)) +
  geom_bar(stat ="identity", position = "dodge")+
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.2),
        panel.grid.major.y = element_blank(),
        panel.border = element_blank(),
        axis.ticks.y = element_blank()
  ) +
  scale_x_discrete("Condition - Replicate")



if (output_format == "pdf"){
  p
} else {
  ggplotly(p, tooltip = c("x","y")) %>% config(displayModeBar = T, 
                                               modeBarButtons = list(list('toImage')),
                                               displaylogo = F)
}

Modification specific peptides

if (!protein_only){
  dt <- unique(mod_pept_int[Imputed == 0, .(ReplicatePC = .N/num_files), by = .(id)])

  ymax <- max(dt[, .N, by = .(ReplicatePC)][, N])
  ymax <- 0.01 * ymax + ymax

  p <- ggplot(dt, aes(x = ReplicatePC)) +
    annotate('rect', xmin = 0.7, xmax = 1.05, ymin = 0, ymax = ymax, alpha=0.2)  +
    geom_histogram(binwidth = max(0.1, round(1/max(num_files), 2)), fill="skyblue2") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.2),
          panel.grid.major.y = element_blank(),
          panel.border = element_blank(),
          axis.ticks.y = element_blank()
    ) +
    scale_x_continuous("Percentage of measurements per peptide", labels = scales::percent, limits = c(0, 1.15), breaks = seq(0, 1, 0.1)) +
    annotate('text', x = 0.5, y = 0.8*ymax, label=str_c("Number of peptides with\nmissing values <= 30%:\n ", dt[ReplicatePC >= 0.7, .N]))


  if (output_format == "pdf"){
    p
  } else {
    ggplotly(p, tooltip = c("y")) %>% config(displayModeBar = T, 
                                                 modeBarButtons = list(list('toImage')),
                                                 displaylogo = F)
  }

}

if (!protein_only){
  dt <- mod_pept_int[Imputed == 0, .(`% measured values` = .N/mod_pept_int[, length(unique(id))]), by = .(run_id, condition, Replicate)]
  dt[, Run := str_c(condition, Replicate, sep = " - ")]

  dt[, `% measured values` := 100*(round(`% measured values`, 2))]

  p<- ggplot(dt, aes(y = `% measured values`, x = Run, 
                     color = condition, fill = condition)) +
    geom_bar(stat ="identity", position = "dodge")+
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.2),
          panel.grid.major.y = element_blank(),
          panel.border = element_blank(),
          axis.ticks.y = element_blank()
    ) +
    scale_x_discrete("Condition - Replicate")


  if (output_format == "pdf"){
    p
  } else {
    ggplotly(p, tooltip = c("x","y")) %>% config(displayModeBar = T, 
                                                 modeBarButtons = list(list('toImage')),
                                                 displaylogo = F)
  }
}

Peptides

if (!protein_only){
  dt <- unique(pept_int[Imputed == 0, .(ReplicatePC = .N/num_files), by = .(id)])

  ymax <- max(dt[, .N, by = .(ReplicatePC)][, N])
  ymax <- 0.01 * ymax + ymax

  p <- ggplot(dt, aes(x = ReplicatePC)) +
    annotate('rect', xmin = 0.7, xmax = 1.05, ymin = 0, ymax = ymax, alpha=0.2)  +
    geom_histogram(binwidth = max(0.1, round(1/max(num_files), 2)), fill="skyblue2") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.2),
          panel.grid.major.y = element_blank(),
          panel.border = element_blank(),
          axis.ticks.y = element_blank()
    ) +
    scale_x_continuous("Percentage of measurements per peptide", labels = scales::percent, limits = c(0, 1.15), breaks = seq(0, 1, 0.1)) +
    annotate('text', x = 0.5, y = 0.8*ymax, label=str_c("Number of peptides with\nmissing values <= 30%:\n ", dt[ReplicatePC >= 0.7, .N]))


  if (output_format == "pdf"){
    p
  } else {
    ggplotly(p, tooltip = c("y")) %>% config(displayModeBar = T, 
                                             modeBarButtons = list(list('toImage')),
                                             displaylogo = F)
  }
}

if (!protein_only){
  dt <- pept_int[Imputed == 0, .(`% measured values` = .N/pept_int[, length(unique(id))]), by = .(run_id, condition, Replicate)]
  dt[, Run := str_c(condition, Replicate, sep = " - ")]

  dt[, `% measured values` := 100*(round(`% measured values`, 2))]

  p<- ggplot(dt, aes(y = `% measured values`, x = Run, 
                     color = condition, fill = condition)) +
    geom_bar(stat ="identity", position = "dodge")+
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.2),
          panel.grid.major.y = element_blank(),
          panel.border = element_blank(),
          axis.ticks.y = element_blank()
    ) +
    scale_x_discrete("Condition - Replicate")



  if (output_format == "pdf"){
    p
  } else {
    ggplotly(p, tooltip = c("x","y")) %>% config(displayModeBar = T, 
                                                 modeBarButtons = list(list('toImage')),
                                                 displaylogo = F)
  }
}

\newpage

3. Sample preparation

Digestion efficiency

Missed Cleavages Details

Accurate quantitative analysis relies on consistent sample preparation. Digestion efficiency is one source of variability between samples. This plot shows the proportion of missed cleavage events in each sample. The ideal scenario would be to have a very low proportion of missed cleavages values greater than 0 and for the proportions to be consistent across samples. Any discrepancy between samples or experimental conditions could indicate issues in the sample preparation.

if (!protein_only){

dt <- copy(evidence)

setnames(dt, "experiment", "condition")
dt[is.na(condition), condition := "Library"]
dt[condition == "Library" & is.na(Replicate), Replicate := .GRP, by = .(`raw file`)]


if (!("fraction" %in% colnames(expdes))) {
  dt <- dt[, .(percent = .N), by = .(`missed cleavages`, condition, Replicate, fraction, `raw file`)]
  dt[, Run := str_c(condition, Replicate, fraction, sep = " - ")]
  xlabel = "Condition - Replicate - Fraction"

} else {
  dt <- dt[, .(percent = .N), by = .(`missed cleavages`, condition, Replicate, `raw file`)]
  dt[, Run := str_c(condition, Replicate, sep = " - ")]
  xlabel = "Condition - Replicate"

}
dt[, `missed cleavages` := as.factor(`missed cleavages`)]

p <- ggplot(dt, aes(x = Run, y = percent,  fill=`missed cleavages`, label=Run)) +
  geom_bar(stat="identity", position=position_fill(reverse = TRUE)) +
  theme_minimal() +
  scale_y_continuous("% missed cleavages", labels = scales::percent) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.2),
        panel.grid.major.y = element_blank(),
        panel.border = element_blank(),
        axis.ticks.y = element_blank()
        ) +
  scale_x_discrete(xlabel)




if (all(is.na(dt$`missed cleavages`))) { 
  cat("'missed cleavage' column is empty. Can't estimate digestion efficiency.")
} else{
  if (output_format == "pdf"){
    p
  } else {
    ggplotly(p, tooltip = c("x","y")) %>% config(displayModeBar = T, 
                                                 modeBarButtons = list(list('toImage')),
                                                 displaylogo = F)
  }
}
}

\newpage

4. Identifications

A. Counts {.tabset .tabset-fade .tabset-pills}

Identifications Details

Identifications of PSM’s (peptide spectral matches), peptides and proteins are a measure of the number of inferences we make at each of those levels as a result of each run.

Low counts in a run may suggest a systematic flaw in the experiment that needs to be addressed prior to interpretation.

Counts are extracted from your data in two ways. If your data has fractions, at the fraction level from evidence.txt or msms.txt and at the replicate level from the corresponding file (eg: proteinGroups.txt for proteins.)

\newpage

Proteins

all - from evidence.txt

\hfill\break

if (!protein_only){
dt <- copy(evidence)

setnames(dt, "experiment", "condition")
dt[is.na(condition), condition := "Library"]
dt[condition == "Library" & is.na(Replicate), Replicate := .GRP, by = .(`raw file`)]



if (!("fraction" %in% colnames(expdes))) {
  dt <- unique(dt[, .(`leading razor protein`, condition, Replicate, fraction, `raw file`)])[, .N, by = .(condition, Replicate, fraction, `raw file`)]
  dt[, Run := str_c(condition, Replicate, fraction, sep = " - ")]
  xlabel = "Condition - Replicate - Fraction"
} else {
  dt <- unique(dt[, .(`leading razor protein`, condition, Replicate,  `raw file`)])[, .N, by = .(condition, Replicate, `raw file`)]
  dt[, Run := str_c(condition, Replicate, sep = " - ")]
  xlabel = "Condition - Replicate"
}

p <- ggplot(dt, aes(x = as.factor(Run), y = N, color = condition, label=`raw file`)) +
  geom_segment( aes(x=as.factor(Run), xend=as.factor(Run), y=0, yend=N), color="skyblue") +
  geom_point(size=2, alpha=0.9) +
  coord_flip() +
  theme_minimal() +
  scale_x_discrete(xlabel) +
  scale_y_continuous("# Identifications") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.2),
        panel.grid.major.y = element_blank(),
        panel.border = element_blank(),
        axis.ticks.y = element_blank()
        )




if (output_format == "pdf"){
  p
} else {
  ggplotly(p, tooltip = c("label","y")) %>% config(displayModeBar = T, 
                                               modeBarButtons = list(list('toImage')),
                                               displaylogo = F)
}
}

With intensity values - from proteinGroups.txt

\hfill\break

dt <- copy(prot_int[Imputed == 0])

dt[is.na(condition), condition := "Library"]


# Protein groups will always have grouped over fractions
dt <- dt[, .N, by = .(condition, Replicate)]
dt[, Run := str_c(condition, Replicate, sep = " - ")]

p <- ggplot(dt, aes(x = as.factor(Run), y = N, color = condition, label=Replicate)) +
  geom_segment( aes(x=as.factor(Run), xend=as.factor(Run), y=0, yend=N), color="skyblue") +
  geom_point(size=2, alpha=0.9) +
  coord_flip() +
  theme_minimal() +
  scale_x_discrete("Condition - Replicate") +
  scale_y_continuous("# Identifications") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.2),
        panel.grid.major.y = element_blank(),
        panel.border = element_blank(),
        axis.ticks.y = element_blank()
        )



if (output_format == "pdf"){
  p
} else {
  ggplotly(p, tooltip = c("label","y")) %>% config(displayModeBar = T, 
                                               modeBarButtons = list(list('toImage')),
                                               displaylogo = F)
}

\newpage

Modification specific peptides

all - from evidence.txt

\hfill\break

#dt <- merge(msms, unique(feature_quant_dt[, .(file_name, condition, Replicate)]), by = "file_name", all.x = T)
if (!protein_only){
dt <- copy(evidence)


setnames(dt, "experiment", "condition")
dt[is.na(condition), condition := "Library"]
dt[condition == "Library" & is.na(Replicate), Replicate := .GRP, by = .(`raw file`)]

if (!("fraction" %in% colnames(expdes))) {
  dt <- unique(dt[, .(sequence, modifications, condition, Replicate, fraction, `raw file`)])[, .N, by = .(condition, Replicate, fraction, `raw file`)]
  dt[, Run := str_c(condition, Replicate, fraction, sep = " - ")]
  xlabel = "Condition - Replicate - Fraction"
} else {
  dt <- unique(dt[, .(sequence, modifications, condition, Replicate, `raw file`)])[, .N, by = .(condition, Replicate, `raw file`)]
  dt[, Run := str_c(condition, Replicate, sep = " - ")]
  xlabel = "Condition - Replicate"
}

p <- ggplot(dt, aes(x = as.factor(Run), y = N, color = condition, label=`raw file`)) +
  geom_segment( aes(x=as.factor(Run), xend=as.factor(Run), y=0, yend=N), color="skyblue") +
  geom_point(size=2, alpha=0.9) +
  coord_flip() +
  theme_minimal() +
  scale_x_discrete(xlabel) +
  scale_y_continuous("# Modified Sequences") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.2),
        panel.grid.major.y = element_blank(),
        panel.border = element_blank(),
        axis.ticks.y = element_blank()
        )


if (output_format == "pdf"){
  p
} else {
  ggplotly(p, tooltip = c("label","y")) %>% config(displayModeBar = T, 
                                               modeBarButtons = list(list('toImage')),
                                               displaylogo = F)
}
}

\hfill\break \newpage

With intensity values - from modificationSpecificPeptides.txt

\hfill\break

if (!protein_only){
dt <- copy(mod_pept_int[Imputed == 0])

dt[is.na(condition), condition := "Library"]

# modificationSpecificPeptides.txt will always have grouped over fractions
dt <- dt[, .N, by = .(condition, Replicate)]
dt[, Run := str_c(condition, Replicate, sep = " - ")]

p <- ggplot(dt, aes(x = as.factor(Run), y = N, color = condition, label=Replicate)) +
  geom_segment( aes(x=as.factor(Run), xend=as.factor(Run), y=0, yend=N), color="skyblue") +
  geom_point(size=2, alpha=0.9) +
  coord_flip() +
  theme_minimal() +
  scale_x_discrete("Condition - Replicate") +
  scale_y_continuous("# Modified Sequences") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.2),
        panel.grid.major.y = element_blank(),
        panel.border = element_blank(),
        axis.ticks.y = element_blank()
        )



if (output_format == "pdf"){
  p
} else {
  ggplotly(p, tooltip = c("label","y")) %>% config(displayModeBar = T, 
                                               modeBarButtons = list(list('toImage')),
                                               displaylogo = F)
}
}

\hfill\break \newpage

Peptides

all - from evidence.txt

\hfill\break

if (!protein_only){
dt <- copy(evidence)

setnames(dt, "experiment", "condition")
dt[is.na(condition), condition := "Library"]
dt[condition == "Library" & is.na(Replicate), Replicate := .GRP, by = .(`raw file`)]


if (!("fraction" %in% colnames(expdes))) {
  dt <- unique(dt[, .(sequence, condition, Replicate, fraction, `raw file`)])[, .N, by = .(condition, Replicate, fraction, `raw file`)]
  dt[, Run := str_c(condition, Replicate, fraction, sep = " - ")]
  xlabel = "Condition - Replicate - Fraction"
} else {
  dt <- unique(dt[, .(sequence, condition, Replicate, `raw file`)])[, .N, by = .(condition, Replicate, `raw file`)]
  dt[, Run := str_c(condition, Replicate, sep = " - ")]
  xlabel = "Condition - Replicate"
}

p <- ggplot(dt, aes(x = as.factor(Run), y = N, color = condition, label=`raw file`)) +
  geom_segment( aes(x=as.factor(Run), xend=as.factor(Run), y=0, yend=N), color="skyblue") +
  geom_point(size=2, alpha=0.9) +
  coord_flip() +
  theme_minimal() +
  scale_x_discrete(xlabel) +
  scale_y_continuous("# Peptide Sequences") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.2),
        panel.grid.major.y = element_blank(),
        panel.border = element_blank(),
        axis.ticks.y = element_blank()
  )




if (output_format == "pdf"){
  p
} else {
  ggplotly(p, tooltip = c("label","y")) %>% config(displayModeBar = T, 
                                               modeBarButtons = list(list('toImage')),
                                               displaylogo = F)
}
}

\hfill\break

With intensity values - from peptides.txt

\hfill\break

if (!protein_only){
  dt <- copy(pept_int[Imputed == 0])

dt[is.na(condition), condition := "Library"]


dt <- dt[, .N, by = .(condition, Replicate)]
dt[, Run := str_c(condition, Replicate, sep = " - ")]

p <- ggplot(dt, aes(x = as.factor(Run), y = N, color = condition, label=Replicate)) +
  geom_segment( aes(x=as.factor(Run), xend=as.factor(Run), y=0, yend=N), color="skyblue") +
  geom_point(size=2, alpha=0.9) +
  coord_flip() +
  theme_minimal() +
  scale_x_discrete("Condition - Replicate") +
  scale_y_continuous("# Peptides") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.2),
        panel.grid.major.y = element_blank(),
        panel.border = element_blank(),
        axis.ticks.y = element_blank()
        )



if (output_format == "pdf"){
  p
} else {
  ggplotly(p, tooltip = c("label","y")) %>% config(displayModeBar = T, 
                                               modeBarButtons = list(list('toImage')),
                                               displaylogo = F)
}
}

\hfill\break \newpage

PSMs

\hfill\break

if (!protein_only){
dt <- copy(msms)
setnames(dt, "experiment", "condition")

# check if there are any files in the msms.txt not included in the experiment design
dt[is.na(condition), condition := "Library"]
dt[condition == "Library" & is.na(Replicate), Replicate := .GRP, by = .(`raw file`)]


if (!("fraction" %in% colnames(expdes))) {
  dt <- dt[, .N, by = .(condition, Replicate, fraction, `raw file`)]
  dt[, Run := str_c(condition, Replicate, fraction, sep = " - ")]
  xlabel = "Condition - Replicate - Fraction"
} else {
  dt <- dt[, .N, by = .(condition, Replicate, `raw file`)]
  dt[, Run := str_c(condition, Replicate, sep = " - ")]
  xlabel = "Condition - Replicate"
}

p <- ggplot(dt, aes(x = as.factor(Run), y = N, color = condition, label=`raw file`)) +
  geom_segment( aes(x=as.factor(Run), xend=as.factor(Run), y=0, yend=N), color="skyblue") +
  geom_point(size=2, alpha=0.9) +
  coord_flip() +
  theme_minimal() +
  scale_x_discrete(xlabel) +
  scale_y_continuous("# PSMs") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.2),
        panel.grid.major.y = element_blank(),
        panel.border = element_blank(),
        axis.ticks.y = element_blank()
        )


if (output_format == "pdf"){
  p
} else {
  ggplotly(p, tooltip = c("label","y")) %>% config(displayModeBar = T, 
                                               modeBarButtons = list(list('toImage')),
                                               displaylogo = F)
}
}

MassDynamics/lfq_processing documentation built on May 4, 2023, 11:20 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

MassDynamics/lfq_processing
Processes Proteomics Files for Quantification.

In MassDynamics/lfq_processing: Processes Proteomics Files for Quantification.

1. Experiment Health

A. Principal Component Analysis (PCA) and Dimensionality Reduction {.tabset .tabset-fade .tabset-pills}

Proteins

Modification specific peptides

Peptides

B. Quantitative values CV distributions {.tabset .tabset-fade .tabset-pills}

Proteins

Modification specific peptides

Peptides

C. Sample and Intensity Correlation Plots {.tabset .tabset-fade .tabset-pills}

Proteins

Modification specific peptides

Peptides

2. Feature Completeness

A. Data completeness {.tabset .tabset-fade .tabset-pills}

Proteins

Modification specific peptides

Peptides

3. Sample preparation

Digestion efficiency

4. Identifications

A. Counts {.tabset .tabset-fade .tabset-pills}

Proteins

all - from evidence.txt

With intensity values - from proteinGroups.txt

Modification specific peptides

all - from evidence.txt

With intensity values - from modificationSpecificPeptides.txt

Peptides

all - from evidence.txt

With intensity values - from peptides.txt

PSMs

R Package Documentation

Browse R Packages

We want your feedback!

MassDynamics/lfq_processing Processes Proteomics Files for Quantification.

In MassDynamics/lfq_processing: Processes Proteomics Files for Quantification.

1. Experiment Health

A. Principal Component Analysis (PCA) and Dimensionality Reduction {.tabset .tabset-fade .tabset-pills}

Proteins

Modification specific peptides

Peptides

B. Quantitative values CV distributions {.tabset .tabset-fade .tabset-pills}

Proteins

Modification specific peptides

Peptides

C. Sample and Intensity Correlation Plots {.tabset .tabset-fade .tabset-pills}

Proteins

Modification specific peptides

Peptides

2. Feature Completeness

A. Data completeness {.tabset .tabset-fade .tabset-pills}

Proteins

Modification specific peptides

Peptides

3. Sample preparation

Digestion efficiency

4. Identifications

A. Counts {.tabset .tabset-fade .tabset-pills}

Proteins

all - from evidence.txt

With intensity values - from proteinGroups.txt

Modification specific peptides

all - from evidence.txt

With intensity values - from modificationSpecificPeptides.txt

Peptides

all - from evidence.txt

With intensity values - from peptides.txt

PSMs

R Package Documentation

Browse R Packages

We want your feedback!

MassDynamics/lfq_processing
Processes Proteomics Files for Quantification.