Nothing
## ----required packages, echo = FALSE, warning=FALSE, results="hide"-----------
suppressPackageStartupMessages({
library("BiocStyle")
library("DEP")
library("dplyr")
})
## ----install, eval = FALSE----------------------------------------------------
#
# if (!requireNamespace("BiocManager", quietly=TRUE))
# install.packages("BiocManager")
# BiocManager::install("DEP")
#
# library("DEP")
#
## ----run_app, eval = FALSE----------------------------------------------------
# # For LFQ analysis
# run_app("LFQ")
#
# # For TMT analysis
# run_app("TMT")
## ----load data----------------------------------------------------------------
# Loading a package required for data handling
library("dplyr")
# The data is provided with the package
data <- UbiLength
# We filter for contaminant proteins and decoy database hits, which are indicated by "+" in the columns "Potential.contaminants" and "Reverse", respectively.
data <- filter(data, Reverse != "+", Potential.contaminant != "+")
## ----dimension----------------------------------------------------------------
dim(data)
## ----colnames-----------------------------------------------------------------
colnames(data)
## ----unique-------------------------------------------------------------------
# Are there any duplicated gene names?
data$Gene.names %>% duplicated() %>% any()
# Make a table of duplicated gene names
data %>% group_by(Gene.names) %>% summarize(frequency = n()) %>%
arrange(desc(frequency)) %>% filter(frequency > 1)
## ----unique_names-------------------------------------------------------------
# Make unique names using the annotation in the "Gene.names" column as primary names and the annotation in "Protein.IDs" as name for those that do not have an gene name.
data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
# Are there any duplicated names?
data$name %>% duplicated() %>% any()
## ----expdesign, echo = FALSE--------------------------------------------------
# Display experimental design
knitr::kable(UbiLength_ExpDesign)
## ----to_exprset---------------------------------------------------------------
# Generate a SummarizedExperiment object using an experimental design
LFQ_columns <- grep("LFQ.", colnames(data_unique)) # get LFQ column numbers
experimental_design <- UbiLength_ExpDesign
data_se <- make_se(data_unique, LFQ_columns, experimental_design)
# Generate a SummarizedExperiment object by parsing condition information from the column names
LFQ_columns <- grep("LFQ.", colnames(data_unique)) # get LFQ column numbers
data_se_parsed <- make_se_parse(data_unique, LFQ_columns)
# Let's have a look at the SummarizedExperiment object
data_se
## ----plot_data_noFilt, fig.width = 4, fig.height = 4--------------------------
# Plot a barplot of the protein identification overlap between samples
plot_frequency(data_se)
## ----filter_missval-----------------------------------------------------------
# Filter for proteins that are identified in all replicates of at least one condition
data_filt <- filter_missval(data_se, thr = 0)
# Less stringent filtering:
# Filter for proteins that are identified in 2 out of 3 replicates of at least one condition
data_filt2 <- filter_missval(data_se, thr = 1)
## ----plot_data, fig.width = 4, fig.height = 4---------------------------------
# Plot a barplot of the number of identified proteins per samples
plot_numbers(data_filt)
## ----plot_data2, fig.width = 3, fig.height = 4--------------------------------
# Plot a barplot of the protein identification overlap between samples
plot_coverage(data_filt)
## ----normalize----------------------------------------------------------------
# Normalize the data
data_norm <- normalize_vsn(data_filt)
## ----plot_norm, fig.width = 4, fig.height = 5---------------------------------
# Visualize normalization by boxplots for all samples before and after normalization
plot_normalization(data_filt, data_norm)
## ----plot_missval, fig.height = 4, fig.width = 3------------------------------
# Plot a heatmap of proteins with missing values
plot_missval(data_filt)
## ----plot_detect, fig.height = 4, fig.width = 4-------------------------------
# Plot intensity distributions and cumulative fraction of proteins with and without missing values
plot_detect(data_filt)
## ----impute, results = "hide", message = FALSE, warning = FALSE, error = TRUE----
# All possible imputation methods are printed in an error, if an invalid function name is given.
impute(data_norm, fun = "")
# Impute missing data using random draws from a Gaussian distribution centered around a minimal value (for MNAR)
data_imp <- impute(data_norm, fun = "MinProb", q = 0.01)
# Impute missing data using random draws from a manually defined left-shifted Gaussian distribution (for MNAR)
data_imp_man <- impute(data_norm, fun = "man", shift = 1.8, scale = 0.3)
# Impute missing data using the k-nearest neighbour approach (for MAR)
data_imp_knn <- impute(data_norm, fun = "knn", rowmax = 0.9)
## ----plot_imp, fig.width = 4, fig.height = 4----------------------------------
# Plot intensity distributions before and after imputation
plot_imputation(data_norm, data_imp)
## ----statistics---------------------------------------------------------------
# Differential enrichment analysis based on linear models and empherical Bayes statistics
# Test every sample versus control
data_diff <- test_diff(data_imp, type = "control", control = "Ctrl")
# Test all possible comparisons of samples
data_diff_all_contrasts <- test_diff(data_imp, type = "all")
# Test manually defined comparisons
data_diff_manual <- test_diff(data_imp, type = "manual",
test = c("Ubi4_vs_Ctrl", "Ubi6_vs_Ctrl"))
## ----add_reject---------------------------------------------------------------
# Denote significant proteins based on user defined cutoffs
dep <- add_rejections(data_diff, alpha = 0.05, lfc = log2(1.5))
## ----pca, fig.height = 3, fig.width = 4---------------------------------------
# Plot the first and second principal components
plot_pca(dep, x = 1, y = 2, n = 500, point_size = 4)
## ----corr, fig.height = 3, fig.width = 5--------------------------------------
# Plot the Pearson correlation matrix
plot_cor(dep, significant = TRUE, lower = 0, upper = 1, pal = "Reds")
## ----heatmap, fig.height = 5, fig.width = 3-----------------------------------
# Plot a heatmap of all significant proteins with the data centered per protein
plot_heatmap(dep, type = "centered", kmeans = TRUE,
k = 6, col_limit = 4, show_row_names = FALSE,
indicate = c("condition", "replicate"))
## ----heatmap2, fig.height = 5, fig.width = 3----------------------------------
# Plot a heatmap of all significant proteins (rows) and the tested contrasts (columns)
plot_heatmap(dep, type = "contrast", kmeans = TRUE,
k = 6, col_limit = 10, show_row_names = FALSE)
## ----volcano, fig.height = 5, fig.width = 5-----------------------------------
# Plot a volcano plot for the contrast "Ubi6 vs Ctrl""
plot_volcano(dep, contrast = "Ubi6_vs_Ctrl", label_size = 2, add_names = TRUE)
## ----bar, fig.height = 3.5, fig.width = 3.5-----------------------------------
# Plot a barplot for USP15 and IKBKG
plot_single(dep, proteins = c("USP15", "IKBKG"))
# Plot a barplot for the protein USP15 with the data centered
plot_single(dep, proteins = "USP15", type = "centered")
## ----overlap, fig.height = 4, fig.width = 6-----------------------------------
# Plot a frequency plot of significant proteins for the different conditions
plot_cond(dep)
## ----results_table------------------------------------------------------------
# Generate a results table
data_results <- get_results(dep)
# Number of significant proteins
data_results %>% filter(significant) %>% nrow()
## ----results_table2-----------------------------------------------------------
# Column names of the results table
colnames(data_results)
## ----get_df-------------------------------------------------------------------
# Generate a wide data.frame
df_wide <- get_df_wide(dep)
# Generate a long data.frame
df_long <- get_df_long(dep)
## ----save_load, eval = FALSE--------------------------------------------------
# # Save analyzed data
# save(data_se, data_norm, data_imp, data_diff, dep, file = "data.RData")
# # These data can be loaded in future R sessions using this command
# load("data.RData")
## ----LFQ, message = FALSE-----------------------------------------------------
# The data is provided with the package
data <- UbiLength
experimental_design <- UbiLength_ExpDesign
# The wrapper function performs the full analysis
data_results <- LFQ(data, experimental_design, fun = "MinProb",
type = "control", control = "Ctrl", alpha = 0.05, lfc = 1)
## ----report2, eval = FALSE----------------------------------------------------
# # Make a markdown report and save the results
# report(data_results)
## ----report1------------------------------------------------------------------
# See all objects saved within the results object
names(data_results)
## ----LFQ_results3-------------------------------------------------------------
# Extract the results table
results_table <- data_results$results
# Number of significant proteins
results_table %>% filter(significant) %>% nrow()
## ----LFQ_results4, fig.height = 5, fig.width = 3------------------------------
# Extract the sign object
full_data <- data_results$dep
# Use the full data to generate a heatmap
plot_heatmap(full_data, type = "contrast", kmeans = TRUE,
k = 6, col_limit = 4, show_row_names = FALSE)
## ----TMT, eval = FALSE--------------------------------------------------------
# # Need example data
# TMTdata <- example_data
# Exp_Design <- example_Exp_Design
#
# # The wrapper function performs the full analysis
# TMTdata_results <- TMT(TMTdata, expdesign = Exp_Design, fun = "MinProb",
# type = "control", control = "Control", alpha = 0.05, lfc = 1)
## ----session_info, echo = FALSE-----------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.