knitr::opts_chunk$set(fig.cap = NULL, fig.path = params$output_figure)

library(knitr)
library(data.table)
library(ggplot2)
library(ggrepel)
library(GGally)
library(umap)
library(FactoMineR)
library(factoextra)
library(corrplot)
library(viridis)
library(ggpubr)
library(Hmisc)
library(plotly)
library(stringr)
library(bit64)
#assert that all the stuff we need is there. 
stopifnot(exists("expdes"))
stopifnot(exists("prot"))
stopifnot(exists("prot_int"))
expdes <- expdes[,c("condition", "experiment", "reporter_channel", "replicate")]
# need to understand what replicate PC means for prot_int with TMT
num_files <- nrow(expdes)
dt <- unique(prot_int[Imputed == 0, .(ReplicatePC = .N/num_files), by = .(id)])

ymax <- max(dt[, .N, by = .(ReplicatePC)][, N])
ymax <- 0.01 * ymax + ymax

p <- ggplot(dt, aes(x = ReplicatePC)) +
  annotate('rect', xmin = 0.7, xmax = 1.05, ymin = 0, ymax = ymax, alpha=0.2)  +
  geom_histogram(binwidth = max(0.1, round(1/max(num_files), 2)), fill="skyblue2") +
  theme_minimal() +
  scale_x_continuous("Percentage of measurements per protein", labels = scales::percent, limits = c(0, 1.15), breaks = seq(0, 1, 0.1)) +
  annotate('text', x = 0.5, y = 0.8*ymax, label=str_c("Number of proteins with\nmissing values <= 30%:\n ", dt[ReplicatePC >= 0.7, .N])) +
  ggtitle("Protein measurements availability")


ggplotly(p, tooltip = c("y")) %>% config(displayModeBar = T, 
                                                  modeBarButtons = list(list('toImage')),
                                                  displaylogo = F)


MassDynamics/lfq_processing documentation built on May 4, 2023, 11:20 p.m.