knitr::opts_chunk$set(fig.cap = NULL, fig.path = params$output_figure)

library(data.table)
library(ggplot2)
library(ggrepel)
library(GGally)
library(umap)
library(FactoMineR)
library(factoextra)
library(corrplot)
library(viridis)
library(ggpubr)
library(Hmisc)
library(plotly)
library(stringr)
library(bit64)

num_files <- expdes[, .N]
run_per_condition <- expdes[, .(countRepMax = .N), by = .(experiment)]
setnames(run_per_condition, "experiment", "condition")

# for fractions, create file name from mqExperiment and Fraction
if (!("file_name" %in% colnames(expdes))){
  expdes$file_name = paste(expdes$experiment, " - ", expdes$Replicate)
}


mod_pept_int_rep <- merge(
  run_per_condition,
  mod_pept_int[Imputed == 0, .(Repcount = .N), by = .(id, condition)],
  by = c("condition")
)
mod_pept_int_rep[, repPC := Repcount/countRepMax]
mod_pept_id_in_a_cond <- mod_pept_int_rep[repPC >= 0.5, unique(id)]
mod_pept_int[, Valid := 0L]
mod_pept_int[id %in% mod_pept_id_in_a_cond, Valid := 1L]
rm(mod_pept_id_in_a_cond, mod_pept_int_rep)

pept_int_rep <- merge(
  run_per_condition,
  pept_int[Imputed == 0, .(Repcount = .N), by = .(id, condition)],
  by = c("condition")
)
pept_int_rep[, repPC := Repcount/countRepMax]
pept_id_in_a_cond <- pept_int_rep[repPC >= 0.5, unique(id)]
pept_int[, Valid := 0L]
pept_int[id %in% pept_id_in_a_cond, Valid := 1L]
rm(pept_id_in_a_cond, pept_int_rep)


prot_int_rep <- merge(
  run_per_condition,
  prot_int[Imputed == 0, .(Repcount = .N), by = .(id, condition)],
  by = c("condition")
)
prot_int_rep[, repPC := Repcount/countRepMax]
prot_id_in_a_cond <- prot_int_rep[repPC >= 0.5, unique(id)]
prot_int[, Valid := 0L]
prot_int[id %in% prot_id_in_a_cond, Valid := 1L]
rm(prot_id_in_a_cond, prot_int_rep)
dt <- unique(pept_int[Imputed == 0, .(ReplicatePC = .N/num_files), by = .(id)])

ymax <- max(dt[, .N, by = .(ReplicatePC)][, N])
ymax <- 0.01 * ymax + ymax

p <- ggplot(dt, aes(x = ReplicatePC)) +
  annotate('rect', xmin = 0.7, xmax = 1.05, ymin = 0, ymax = ymax, alpha=0.2)  +
  geom_histogram(binwidth = max(0.1, round(1/max(num_files), 2)), fill="skyblue2") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.2),
        panel.grid.major.y = element_blank(),
        panel.border = element_blank(),
        axis.ticks.y = element_blank()
  ) +
  scale_x_continuous("Percentage of measurements per peptide", labels = scales::percent, limits = c(0, 1.15), breaks = seq(0, 1, 0.1)) +
  annotate('text', x = 0.5, y = 0.8*ymax, label=str_c("Number of peptides with\nmissing values <= 30%:\n ", dt[ReplicatePC >= 0.7, .N])) +
  ggtitle("Peptide measurements availability")

ggplotly(p, tooltip = c("y")) %>% config(displayModeBar = T, 
                                         modeBarButtons = list(list('toImage')),
                                         displaylogo = F)


MassDynamics/lfq_processing documentation built on May 4, 2023, 11:20 p.m.