library(plotly)
library(stringr)
knitr::opts_chunk$set(echo = TRUE,
                      message = FALSE,
                      warning = FALSE,
                      collapse = TRUE,
                      comment = "#>", 
                      fig.width = 6, fig.height = 6)

Setup

library(devtools)
library(tibble)
devtools::install_github("MassExpression")
library(MassExpression)
utils::packageVersion("MassExpression")

Internal data available

data(package="MassExpression")

Run workflow with sample data

intensities <- mq_lfq_data$intensities
design <- mq_lfq_data$design
parameters <- mq_lfq_data$parameters
normalisation_method <- parameters[parameters[,1] == "UseNormalisationMethod",2]
species <- parameters[parameters[,1] == "Species",2]
labellingMethod <- parameters[parameters[,1] == "LabellingMethod",2]


results <- runGenericDiscovery(experimentDesign = design, 
                               proteinIntensities = intensities, 
                               normalisationMethod = normalisation_method, 
                               species = species, 
                               labellingMethod = labellingMethod)

IntensityExperiment <- results$IntensityExperiment
CompleteIntensityExperiment <-  results$CompleteIntensityExperiment
design <- colData(CompleteIntensityExperiment)

Plot results

1. Experiment Design and Summary of Results

design <- tibble::as_tibble(design)
design$SampleName <- make.names(design$SampleName)
design <- design %>% tidyr::unite(SampleNameInPlots, Condition, Replicate, 
                                                      sep="_", remove=FALSE) %>%
  dplyr::select(-Replicate, everything())

design <- design %>% dplyr::select(SampleName, Condition, 
                SampleNameInPlots, everything())


knitr::kable(design, row.names = FALSE) 

\clearpage

The table below reports the number of proteins (Number of Proteins column) considered for each pairwise comparison analysis (proteins with more than 50% missing values across samples are removed) and the number of DE proteins detected in each comparison (Number DE Proteins column). The overall total number of proteins included in the exeriment is r nrow(rowData(CompleteIntensityExperiment)).

stats <- tibble::as_tibble(rowData(CompleteIntensityExperiment))
pval_proteins <- stats %>%
  dplyr::select(ProteinId, starts_with("adj.P.Val"))

total_proteins_in_experiment <- nrow(pval_proteins)
total_proteins_de <- length(pval_proteins$adj.P.Val < 0.05)

n_proteins_by_cond <- pval_proteins
n_proteins_by_cond$adj.P.Val <- NULL

n_proteins_by_cond_long <- n_proteins_by_cond %>% tidyr::pivot_longer(cols = starts_with("adj.P.Val"), names_to = "Comparison",values_to = "AdjPval")
n_proteins_by_cond_long$Comparison <- gsub("adj.P.Val.", "", n_proteins_by_cond_long$Comparison)

summary_de <- n_proteins_by_cond_long %>% 
  group_by(Comparison) %>%
  summarise(`Number of Proteins` = sum(!is.na(AdjPval)),
            `Number DE Proteins` = sum(AdjPval < 0.05, na.rm = TRUE))

knitr::kable(summary_de, row.names = FALSE, caption="Summary of differential expression results across comparisons.") 

2. Experiment Health

A. Dimensionality reduction {.tabset .tabset-fade .tabset-pills}

## PCA
p=plot_chosen_pca_experiment(CompleteIntensityExperiment, assayName = "intensities", format = "html")
p[[1]]
p[[2]]
p=plot_chosen_pca_experiment(CompleteIntensityExperiment, assayName = "intensities", format = "html", auto_select_features = "de")
p[[1]]

B. Quantitative values CV distributions {.tabset .tabset-fade .tabset-pills}

p=plot_condition_cv_distribution(IntensityExperiment)
ggplotly(p[[1]]) %>% plotly::config(displayModeBar = T, 
                        modeBarButtons = list(list('toImage')),
                        displaylogo = F)
cv_data <- p[[2]]
cv_data <- tibble::as_tibble(cv_data) %>% group_by(Condition) %>%
  summarise(`Median CV %` = round(median(cv, na.rm = TRUE)*100))

knitr::kable(cv_data, row.names = FALSE)

C. Sample correlations

plot_samples_correlation_matrix(CompleteIntensityExperiment, assayName = "intensities")
plot_samples_correlation_matrix(CompleteIntensityExperiment,assayName = "intensities", onlyDEProteins = TRUE)

3. Feature completedness {.tabset .tabset-fade .tabset-pills}

Patterns of missing values

plot_missingness_heatmap(IntensityExperiment)

By sample

p <- plot_replicate_missingness(IntensityExperiment, assayName = "raw")
ggplotly(p, tooltip = c("y")) %>% plotly::config(displayModeBar = T, 
                                                  modeBarButtons = list(list('toImage')),
                                                  displaylogo = F)

By protein

p <- plot_protein_missingness(IntensityExperiment, assayName="raw")

ggplotly(p, tooltip = c("y")) %>% plotly::config(displayModeBar = T, 
                                                  modeBarButtons = list(list('toImage')),
                                                  displaylogo = F)

4. Identifications {.tabset .tabset-fade .tabset-pills}

Counts

plot_n_identified_proteins_by_replicate(IntensityExperiment)

Consistent across conditions

plot_consistent_proteins_by_replicate(IntensityExperiment)

5. Normalisation and Imputation

A. Intensities distribution (before and after normalisation) {.tabset .tabset-fade .tabset-pills}

Raw intensities

normalised <- FALSE
if(metadata(CompleteIntensityExperiment)$NormalisationAppliedToAssay != "None"){
  normalised <- TRUE
}

# Plot RLE of log2 raw intensity as well as RLE of normalised 
p_raw <- plot_log_measurement_boxplot(IntensityExperiment, title = "log2 Raw Intensities", format = "html")
p_raw 

RLE (median centered protein intensities)

normalised <- FALSE
if(metadata(CompleteIntensityExperiment)$NormalisationAppliedToAssay != "None"){
  normalised <- TRUE
}

# Plot RLE of log2 raw intensity as well as RLE of normalised 
p_raw <- plot_rle_boxplot(IntensityExperiment, CompleteIntensityExperiment, 
                    includeImputed = FALSE, plotRawRLE = TRUE, 
                    title = "RLE of log2 Raw Intensities", 
                    format = "html")
p_raw 
if(normalised){
  # Plot RLE of log2 raw intensity as well as RLE of normalised 
  p_norm <- plot_rle_boxplot(IntensityExperiment, CompleteIntensityExperiment, 
                      includeImputed = FALSE, plotRawRLE = FALSE)
  p_norm <- p_norm + ggtitle("RLE of Normalised log2 Intensities") 

  p_norm
}

B. Imputed vs actual intensities {.tabset .tabset-fade .tabset-pills}

plot_imputed_vs_not(CompleteIntensityExperiment = CompleteIntensityExperiment)

5. Model QC

Volcano plots

stats <- rowData(CompleteIntensityExperiment)
logfcs <- colnames(stats)[stringr::str_detect(colnames(stats), "logFC.")]
for(lfc in logfcs){
  comp_name <- stringr::str_remove(lfc, "logFC.")
  adj <- paste0("adj.P.Val.",comp_name)
  adjp <- colnames(stats)[stringr::str_detect(colnames(stats), adj)]
  pval <- paste0("P.Value.",comp_name)
  pval_col <- colnames(stats)[str_detect(colnames(stats), pval)]
  comparison.statistics <- data.frame(FoldChange=stats[,lfc],
                                      AdjustedPValue=stats[,adjp], 
                                      PValue = stats[,pval_col],
                                      ProteinId=stats$ProteinId, comparison=comp_name)
  print(plot_volcano(comparison.statistics))

}

MA plot

for(lfc in logfcs){
  comp_name <- str_remove(lfc, "logFC.")
  comparison.statistics <- data.frame(FoldChange=stats[,lfc],
                                      AveExpr=stats$AveExpr, 
                                      ProteinId=stats$ProteinId, comparison=comp_name)
  print(plot_ma(comparison.statistics))

}

Histogram of pvalues

pvals <- colnames(stats)[str_detect(colnames(stats), "P.Value.")]
for(pval in pvals){
  comp_name <- str_remove(pval, "P.Value.")
  print(hist(stats[,pval], main=comp_name))

}

6. Session Information

sessionInfo()


MassDynamics/MassExpression documentation built on May 7, 2023, 11:29 a.m.