knitr::opts_chunk$set(echo = TRUE, eval = FALSE) reRunFitting <- FALSE
This vignette illustrates the analysis of the phosphoTPP dataset by @potel_2020 and compares it to the one by @huang_2019.
The supplementary tables 41592_2019_499_MOESM3_ESM.xlsx
and 41592_2019_499_MOESM4_ESM.xlsx
by @huang_2019 can be downloaded from the journal website: https://www.nature.com/articles/s41592-019-0499-3#Sec26 .
The supplementary tables 56035_2_data_set_478471_pxb8gk.xlsx
and 56035_2_data_set_478475_pxb8l1-tableS3.xlsx
by @ochoa_2020 can also be downloaded from the respective journal website: https://www.nature.com/articles/s41587-019-0344-3#Sec30 .
The processed data by @potel_2020 and reprocessed data by @huang_2019 can be downloaded from Mendeley: https://data.mendeley.com/datasets/4vwzvxfcnd/3
The table 1-s2.0-S0092867418303854-mmc4.xlsx
(used for supplementary figures comparing different TPP-TR experiments) can be downloaded from the respective journal website: https://www.sciencedirect.com/science/article/pii/S0092867418303854#app2
All tables should be saved in the dat
folder of the package.
Install the phosphoTPP
package from github
if (!requireNamespace("devtools", quietly = TRUE)) install.packages("devtools") devtools::install_github("nkurzaw/phosphoTPP")
We then load the required libraries
library(phosphoTPP) library(dplyr) library(readr) library(tidyr) library(readxl) library(GGally) library(ggpmisc)
Set some plotting options
# set plotting sizes anno_size <- 2 pval_size <- 2 rsq_size <- 2 # define plotting theme theme_paper <- theme(axis.title = element_text(size = 8), axis.text = element_text(size = 6), legend.text = element_text(size = 6), title = element_text(size = 9))
And we set the thresholds for filtering for high-quality peptide identifications and quantifications and the filtering criteria for melting curves.
# set thresholds for quality filtering s2i_thres <- 0.5 p2t_thres <- 4 mascot_score_thres <- 20 fdr_at_score_thres <- 0.01 r2_thres <- 0.8 plateau_thres <- 0.2
Load the config table which tells us which TMT channels were used for which temperature treatment
cfg_tab <- readxl::read_xlsx(file.path("../dat", "TPP-TR_config_phospho.xlsx")) cfg_tab
We then create the temperature annotation data frames for the phosphopeptide and unmodified samples
temperature_anno <- cfg_tab %>% dplyr::select(matches('[0-9]{3}')) %>% gather(key, temperature) %>% mutate(channel = paste("sig", key, sep = "_")) %>% mutate(fc_channel = paste("rel_fc", key, sep = "_")) temperature_anno_prot <- cfg_tab %>% dplyr::select(matches('[0-9]{3}')) %>% gather(key, temperature) %>% mutate(channel = paste("signal_sum", key, sep = "_")) %>% mutate(fc_channel = paste("rel_fc", key, sep = "_"))
We now identify the different raw input files
# read in lists of phospho and nbt protein and peptides tables per replicate phospho_protein_files <- list.files("../dat", pattern = 'phospho.+protein', full.names = TRUE) phospho_protein_files
nbf_protein_files <- list.files("../dat", pattern = 'NBF.+protein', full.names = TRUE) nbf_protein_files
phospho_peptide_files <- list.files("../dat", pattern = 'phospho.+peptide.+txt', full.names = TRUE) phospho_peptide_files
phosho_mxq_peptide_files <- list.files("../dat", pattern = 'pTPP.+txt', full.names = TRUE) phosho_mxq_peptide_files
nbf_peptide_files <- list.files("../dat", pattern = 'NBF.+peptide', full.names = TRUE) nbf_peptide_files
Then, we read in the individual files into a list of replicate data for the respective data types ('phosphoproteins', unmodified proteins, phosphopeptides, unmodified peptides, etc.)
# read data files into lists of replicates phospho_protein_tabs <- lapply(seq_len(length(phospho_protein_files)), function(file_i){ read_delim(phospho_protein_files[file_i], delim = "\t") %>% mutate(replicate = file_i) }) nbf_protein_tabs <- lapply(seq_len(length(nbf_protein_files)), function(file_i){ read_delim(nbf_protein_files[file_i], delim = "\t") %>% mutate(replicate = file_i) }) phospho_peptide_tabs <- lapply(seq_len(length(phospho_peptide_files)), function(file_i){ ibq_tab <- read_delim(phospho_peptide_files[file_i], delim = "\t") %>% filter_at(vars(starts_with("sig_")), all_vars(!is.na(.))) mxq_tab <- read_delim(phosho_mxq_peptide_files[file_i], delim = "\t") combined_tab <- left_join( ibq_tab %>% dplyr::select(protein_id, sequence, modifications, matches("sig_"), s2i, p2t, msms_id), mxq_tab %>% dplyr::select(protein_id_mxq = Proteins, sequence = Sequence, msms_id = `MS/MS scan number`, mod_sequence = `Modified sequence`, phospho_prob = `Phospho (STY) Probabilities`, phospho_score_diff = `Phospho (STY) Score Diffs`, phospho_site_STY = `Phospho (STY)`), by = c("sequence", "msms_id")) %>% filter(!is.na(protein_id_mxq)) %>% group_by(sequence, msms_id) %>% filter(!duplicated(mod_sequence)) %>% ungroup %>% filter(getProb(phospho_prob)) %>% filter(p2t >= p2t_thres, s2i >= s2i_thres) %>% mutate(replicate = file_i) %>% mutate(rel_fc_126 = sig_126 / sig_126, rel_fc_127L = sig_127L / sig_126, rel_fc_127H = sig_127H / sig_126, rel_fc_128L = sig_128L / sig_126, rel_fc_128H = sig_128H / sig_126, rel_fc_129L = sig_129L / sig_126, rel_fc_129H = sig_129H / sig_126, rel_fc_130L = sig_130L / sig_126, rel_fc_130H = sig_130H / sig_126, rel_fc_131L = sig_131L / sig_126) return(combined_tab) }) phospho_non_phospho_peptide_tabs <- lapply(seq_len(length(phospho_peptide_files)), function(file_i){ read_delim(phospho_peptide_files[file_i], delim = "\t") %>% filter(is_decoy == 0, score > mascot_score_thres & is_unique == 1, rank == 1, fdr_at_score < fdr_at_score_thres, !is.na(modifications), grepl("TMT", modifications), !grepl("Phospho", modifications), in_quantification_of_protein == 1, !grepl("##", protein_id)) %>% filter_at(vars(starts_with("sig_")), all_vars(!is.na(.))) %>% mutate(replicate = file_i) %>% mutate(rel_fc_126 = sig_126 / sig_126, rel_fc_127L = sig_127L / sig_126, rel_fc_127H = sig_127H / sig_126, rel_fc_128L = sig_128L / sig_126, rel_fc_128H = sig_128H / sig_126, rel_fc_129L = sig_129L / sig_126, rel_fc_129H = sig_129H / sig_126, rel_fc_130L = sig_130L / sig_126, rel_fc_130H = sig_130H / sig_126, rel_fc_131L = sig_131L / sig_126) }) nbf_peptide_tabs <- lapply(seq_len(length(nbf_peptide_files)), function(file_i){ read_delim(nbf_peptide_files[file_i], delim = "\t") %>% filter(is_decoy == 0, score > mascot_score_thres & is_unique == 1, rank == 1, fdr_at_score < fdr_at_score_thres, !is.na(modifications), grepl("TMT", modifications), in_quantification_of_protein == 1, !grepl("##", protein_id)) %>% filter_at(vars(starts_with("sig_")), all_vars(!is.na(.))) %>% mutate(replicate = file_i) %>% mutate(rel_fc_126 = sig_126 / sig_126, rel_fc_127L = sig_127L / sig_126, rel_fc_127H = sig_127H / sig_126, rel_fc_128L = sig_128L / sig_126, rel_fc_128H = sig_128H / sig_126, rel_fc_129L = sig_129L / sig_126, rel_fc_129H = sig_129H / sig_126, rel_fc_130L = sig_130L / sig_126, rel_fc_130H = sig_130H / sig_126, rel_fc_131L = sig_131L / sig_126) })
phospho_protein_tabs <- readRDS("../prerun/phospho_protein_tabs.rds") nbf_protein_tabs <- readRDS("../prerun/nbf_protein_tabs.rds") phospho_peptide_tabs <- readRDS("../prerun/phospho_peptide_tabs.rds") phospho_non_phospho_peptide_tabs <- readRDS("../prerun/phospho_non_phospho_peptide_tabs.rds") nbf_peptide_tabs <- readRDS("../prerun/nbf_peptide_tabs.rds")
Next, we compute our first level normalization factors which normalize all peptides based on non-phosphorylated peptides found in matching replicates of phospho-enriched and non-bound fraction samples.
n_rep <- min(length(phospho_peptide_files), length(nbf_peptide_files)) per_exp_norm_factors <- lapply(seq_len(n_rep), function(rep_i){ norm_in_df <- bind_rows(nbf_peptide_tabs[[rep_i]] %>% mutate(dataset_id = paste("nbf", replicate, sep = "_")), phospho_non_phospho_peptide_tabs[[rep_i]] %>% mutate(dataset_id = paste("phospho", replicate, sep = "_"))) nbf_peptide_norm_factors <- getNormFactors4Data( pep_tab = norm_in_df, temperature_anno = temperature_anno, filter_criteria = list("rel_fc_127H_low" = 0, "rel_fc_129H_low" = 0.4, "rel_fc_129H_high" = 0.6, "rel_fc_130H_low" = 0, "rel_fc_130H_high" = 0.3, "rel_fc_131L_low" = 0, "rel_fc_131L_high" = 0.2), n_rep = 2) median_norm_factors <- nbf_peptide_norm_factors out_df <- tibble( temperature = nbf_peptide_norm_factors[[1]]$temperature, median_norm_factor = nbf_peptide_norm_factors[[1]]$median_rel_value / nbf_peptide_norm_factors[[2]]$median_rel_value) return(out_df) }) per_exp_norm_factors
Then, we retrieve our second level of normalization factors which perform the normalization described by @savitski_2014 (originally done on protein level) which normalize peptide fold changes in the different temperature channels towards the melting curve of median values retrieved from high-quality melting peptides.
norm_in_df <- bind_rows(nbf_peptide_tabs) %>% mutate(dataset_id = paste("nbf", replicate, sep = "_"))#, nbf_peptide_norm_factors <- getNormFactors4Data( pep_tab = norm_in_df, temperature_anno = temperature_anno, filter_criteria = list("rel_fc_127H_low" = 1, "rel_fc_129H_low" = 0.4, "rel_fc_129H_high" = 0.6, "rel_fc_130H_low" = 0, "rel_fc_130H_high" = 0.3, "rel_fc_131L_low" = 0, "rel_fc_131L_high" = 0.2), n_rep = n_rep) nbf_peptide_norm_factors
Now, we apply both normalization factors and visualize the melting curves for our phosphopeptides and non-bound fraction peptides.
nbf_peptide_filtered <- bind_rows(lapply(seq_len(length(nbf_peptide_tabs)), function(tab_i){ nbf_peptide_tabs[[tab_i]] %>% dplyr::select(protein_id, sequence, modifications, matches("rel_fc_"), replicate, score) %>% gather(key, value, -protein_id, -sequence, -modifications, -replicate, -score) %>% group_by(protein_id, sequence, replicate, modifications, key) %>% filter(score == max(score)) %>% left_join(temperature_anno, by = c("key" = "fc_channel")) %>% left_join(nbf_peptide_norm_factors[[tab_i]], by = "temperature") %>% mutate(rel_value = value * norm_factor) %>% left_join(nbf_protein_tabs[[tab_i]] %>% dplyr::select(protein_id, gene_name), by = c("protein_id")) })) phospho_peptide_tab_filtered <- bind_rows(lapply(seq_len(length(phospho_peptide_tabs)), function(tab_i){ phospho_peptide_tabs[[tab_i]] %>% dplyr::select(-matches("sig_")) %>% gather(key, value, matches("rel_fc_")) %>% group_by(protein_id, sequence, mod_sequence, phospho_site_STY, replicate, key) %>% summarise(value = mean(value, na.rm = TRUE), mean_s2i = mean(s2i, na.rm = TRUE), mean_p2t = mean(p2t, na.rm = TRUE)) %>% ungroup %>% left_join(temperature_anno %>% dplyr::select(-key), by = c("key" = "fc_channel")) %>% left_join(per_exp_norm_factors[[tab_i]], by = "temperature") %>% left_join(nbf_peptide_norm_factors[[tab_i]], by = "temperature") %>% mutate(rel_value = value * median_norm_factor * norm_factor) %>% left_join(phospho_protein_tabs[[tab_i]] %>% dplyr::select(protein_id, gene_name), by = c("protein_id")) %>% filter(!grepl("##", protein_id)) })) p_qc1 <- nbf_peptide_filtered %>% ggplot(aes(temperature, rel_value)) + geom_boxplot(aes(group = temperature)) + facet_wrap(~replicate) + coord_cartesian(ylim = c(0, 2.5)) p_qc2 <- phospho_peptide_tab_filtered %>% ggplot(aes(temperature, rel_value)) + geom_boxplot(aes(group = temperature)) + facet_wrap(~replicate) + coord_cartesian(ylim = c(0, 2.5)) cowplot::plot_grid(p_qc1 + ggtitle("nbf"), p_qc2 + ggtitle("phospho"), ncol = 2)
We then fit sigmoids to the melting profiles of the non-bound fractions peptides
# fit curves to nbf dataset out_nbf_df <- nbf_peptide_filtered %>% group_by(gene_name, replicate) %>% do({ suppressWarnings(fitMeltcurveModelAndEval(df = .)) }) %>% ungroup
out_nbf_df <- readRDS("../prerun/out_nbf_df.rds")
We then filter for hiqh quality fit according to the criteria defined by @savitski_2014 and inspect reproducibility by visualization of melting point scatter plots between the different replicates
# filter for high quality fits nbf_tm_rep_df <- out_nbf_df %>% filter(rSq > r2_thres, plateau < plateau_thres) %>% dplyr::select(gene_name, replicate, meltPoint) %>% spread(replicate, meltPoint) pm <- GGally::ggpairs(nbf_tm_rep_df, 2:6, upper = "blank", diag = NULL, lower = list(continuous = GGally::wrap(lowerFn))) GGally::ggmatrix(list(pm[2, 1], NULL, NULL, NULL, pm[3, 1], pm[3,2], NULL, NULL, pm[4, 1], pm[4,2], pm[4,3], NULL, pm[5, 1], pm[5,2], pm[5,3], pm[5,4]), 4, 4, xAxisLabels = c("Tm rep1", "Tm rep2", "Tm rep3", "Tm rep4"), yAxisLabels = c("Tm rep2", "Tm rep3","Tm rep4","Tm rep5"))
Next, we fit sigmoids to the melting profiles of the phosphopeptides
# fit phospho peptide melting curves out_phospho_df <- phospho_peptide_tab_filtered %>% dplyr::select(protein_id, gene_name, sequence, mod_sequence, mean_s2i, mean_p2t, phospho_site_STY, replicate, rel_value, temperature) %>% group_by(protein_id, gene_name, sequence, mod_sequence, mean_s2i, mean_p2t, phospho_site_STY, replicate) %>% do({ suppressWarnings(fitMeltcurveModelAndEval(df = .)) }) %>% ungroup
out_phospho_df <- readRDS("../prerun/out_phospho_df.rds")
And again we inspect reproducibility by visualizing scatter plots of meling points estimated in the different replicates
phospho_tm_rep_df <- out_phospho_df %>% filter(rSq > r2_thres, plateau < plateau_thres) %>% dplyr::select(gene_name, sequence, mod_sequence, phospho_site_STY, replicate, meltPoint) %>% spread(replicate, meltPoint) pm_phospho <- ggpairs(phospho_tm_rep_df, 5:9, upper = "blank", diag = NULL, lower = list(continuous = wrap(lowerFn))) ggmatrix(list(pm_phospho[2, 1], NULL, NULL, NULL, pm_phospho[3, 1], pm_phospho[3,2], NULL, NULL, pm_phospho[4, 1], pm_phospho[4,2], pm_phospho[4,3], NULL, pm_phospho[5, 1], pm_phospho[5,2], pm_phospho[5,3], pm_phospho[5,4]), 4, 4, xAxisLabels = c("Tm rep1", "Tm rep2", "Tm rep3", "Tm rep4"), yAxisLabels = c("Tm rep2", "Tm rep3","Tm rep4","Tm rep5"))
Now we join our results of the melting curve fitting for phosphopeptides and unmodified proteins and apply the test for differential melting points established by @savitski_2014
nbf_tm_rep_fil <- out_nbf_df %>% filter(rSq > r2_thres, plateau < plateau_thres) %>% group_by(gene_name) %>% filter(n() > 2) %>% ungroup %>% dplyr::select(gene_name, replicate, meltPoint) %>% spread(replicate, meltPoint) phospho_tm_rep_fil <- out_phospho_df %>% filter(rSq > r2_thres, plateau < plateau_thres) %>% group_by(gene_name, sequence, mod_sequence, phospho_site_STY) %>% filter(n() > 2) %>% ungroup() %>% dplyr::select(gene_name, sequence, mod_sequence, phospho_site_STY, replicate, meltPoint) %>% spread(replicate, meltPoint) combo_tm_df <- left_join(phospho_tm_rep_fil, nbf_tm_rep_fil, by = "gene_name") %>% rowwise() %>% mutate(id = paste(gene_name, mod_sequence, phospho_site_STY, sep = "_")) %>% ungroup() combo_mean_tm_significant_df <- combo_tm_df %>% rowwise() %>% mutate(mean_phospho = mean(c(`1.x`, `2.x`, `3.x`, `4.x`, `5.x`), na.rm = TRUE), mean_nbf = mean(c(`1.y`, `2.y`, `3.y`, `4.y`, `5.y`), na.rm = TRUE)) %>% ungroup() %>% rowwise() %>% mutate(delta_meltPoint_1 = `1.x` - `1.y`, delta_meltPoint_2 = `2.x` - `2.y`, delta_meltPoint_3 = `3.x` - `3.y`, delta_meltPoint_4 = `4.x` - `4.y`, delta_meltPoint_5 = `5.x` - `5.y`) %>% ungroup() %>% mutate(delta_meltPoint_1_scaled = scale(delta_meltPoint_1), delta_meltPoint_2_scaled = scale(delta_meltPoint_2), delta_meltPoint_3_scaled = scale(delta_meltPoint_3), delta_meltPoint_4_scaled = scale(delta_meltPoint_4), delta_meltPoint_5_scaled = scale(delta_meltPoint_5)) %>% mutate(p_value_1 = 1 - pnorm(abs(delta_meltPoint_1_scaled)), p_value_2 = 1 - pnorm(abs(delta_meltPoint_2_scaled)), p_value_3 = 1 - pnorm(abs(delta_meltPoint_3_scaled)), p_value_4 = 1 - pnorm(abs(delta_meltPoint_4_scaled)), p_value_5 = 1 - pnorm(abs(delta_meltPoint_5_scaled))) %>% ungroup() %>% mutate(p_adj_1 = p.adjust(p_value_1, method = "BH"), p_adj_2 = p.adjust(p_value_2, method = "BH"), p_adj_3 = p.adjust(p_value_3, method = "BH"), p_adj_4 = p.adjust(p_value_4, method = "BH"), p_adj_5 = p.adjust(p_value_5, method = "BH")) %>% rowwise() %>% mutate(significant = countAtLeast(x = c(p_adj_1, p_adj_2, p_adj_3, p_adj_4, p_adj_5), val_with_sign = c(delta_meltPoint_1_scaled, delta_meltPoint_2_scaled, delta_meltPoint_3_scaled, delta_meltPoint_4_scaled, delta_meltPoint_5_scaled), min_n = 2)) %>% ungroup()
To maximize overlap with @huang_2019 , we here choose gene names overlapping their data in case there are ambiguous gene names
## load Huang et al. phospho dataset huang_phospho_tab <- readxl::read_xlsx(file.path("../dat", "41592_2019_499_MOESM4_ESM.xlsx"), skip = 2) colnames(huang_phospho_tab)[15] <- "AveragePhosphoTm" colnames(huang_phospho_tab)[21] <- "deltaTmPval" colnames(huang_phospho_tab)[22] <- "HighQuality" huang_phospho_hq <- filter( huang_phospho_tab, HighQuality == "Yes") combo_mean_tm_fcs_choosen_gene_names <- chooseCoherentGeneNames(in_df = combo_mean_tm_significant_df, their_data = huang_phospho_tab)
Then, we annotate the phosphorylation sites found by mapping back the modified sequence to protein sequences obtained from Uniprot
uniprot_seq_df <- read_csv("../dat/uniprot_seq_df.csv") combo_mean_tm_pSite_anno <- left_join( combo_mean_tm_fcs_choosen_gene_names, uniprot_seq_df, by = "gene_name") %>% rowwise() %>% mutate(Gene_pSite = getPhosphoSiteId( gene_name = gene_name, string = mod_sequence, seq = seq)) %>% ungroup %>% mutate(significant_huang = Gene_pSite %in% filter(huang_phospho_hq, as.numeric(deltaTmPval) < 0.05)$Gene_pSite) p_1b <- ggplot(combo_mean_tm_pSite_anno, aes(mean_nbf, mean_phospho)) + geom_point(shape = 16, alpha = 0.2, size = 0.5) + geom_line(aes(x,y), data = tibble(x = 40:65, y = 40:65), color = "darkgray", size = 0.25) + geom_point(data = filter(combo_mean_tm_pSite_anno, significant), color = "#b2182b", shape = 16, size = 0.5) + geom_point(data = filter(combo_mean_tm_pSite_anno, significant_huang), color = "#1f78b4", shape = 16, size = 0.5) + geom_point(data = filter(combo_mean_tm_pSite_anno, significant_huang, significant), color = "#ff7f00", shape = 16, size = 0.5) + coord_cartesian(xlim = c(40, 65), ylim = c(40, 65)) + ggpmisc::stat_poly_eq(formula = y ~ x, parse = TRUE, size = rsq_size) + labs(x = expression('T'[m]^unmodified* ' '* '('*~degree*C*')'), y = expression('T'[m]^phospho* ' '* '('*~degree*C*')')) + theme_bw() + theme_paper
Now, we load the data on machine learning-derived functional scores for phosphosites by @ochoa_2020 and join them to our dataset
# annotate ochoa et al. functional scores ochoa_et_al_anno <- readxl::read_xlsx(file.path("../dat", "56035_2_data_set_478471_pxb8gk.xlsx")) ochoa_et_al_mapping <- read_delim(file.path("../dat", "ochoa_mapping.txt"), delim = "\t") ochoa_et_al_func_anno <- readxl::read_xlsx(file.path("../dat", "56035_2_data_set_478475_pxb8l1-tableS3.xlsx")) ochoa_et_al_func_anno_psite <- left_join( ochoa_et_al_func_anno, ochoa_et_al_anno, by = c("uniprot", "position") ) ochoa_et_al_anno_fil <- left_join( ochoa_et_al_func_anno_psite %>% dplyr::select(uniprot, position, residue, functional_score, is_disopred, isHotspot, isInterface), ochoa_et_al_mapping %>% dplyr::select(uniprot = From, gene_name = To), by = "uniprot") %>% mutate(Gene_pSite = paste0(gene_name, "_", "p", residue, position)) combo_mean_tm_pSite_anno_functional_scores <- left_join( combo_mean_tm_pSite_anno %>% rowwise() %>% mutate(mean_delta_meltPoint = mean(c(delta_meltPoint_1, delta_meltPoint_2, delta_meltPoint_3, delta_meltPoint_4, delta_meltPoint_5), na.rm = TRUE)) %>% ungroup, ochoa_et_al_anno_fil %>% na.omit %>% dplyr::select(Gene_pSite, functional_score, is_disopred, isHotspot, isInterface), by = "Gene_pSite")
We join the functional scores also to the data by @huang_2019
huang_raw_df <- readxl::read_xlsx(file.path("../dat", "41592_2019_499_MOESM3_ESM.xlsx"), skip = 2) colnames(huang_raw_df)[14] <- "AverageBulkTm" huang_bulk <- huang_raw_df %>% dplyr::select(uniprot_id = `Uniprot Accession`, average_tm_nbf = AverageBulkTm) %>% mutate(average_tm_nbf = as.numeric(average_tm_nbf)) huang_phospho_hq_potel_anno <- huang_phospho_hq %>% dplyr::select(uniprot_id = `Uniprot Accession`, Gene_pSite, average_tm_phospho = AveragePhosphoTm, p_value = deltaTmPval) %>% mutate(significant_potel = Gene_pSite %in% filter(combo_mean_tm_pSite_anno_functional_scores, significant)$Gene_pSite) %>% mutate(average_tm_phospho = as.numeric(average_tm_phospho), p_value = as.numeric(p_value)) %>% left_join(huang_bulk, by = "uniprot_id") p_1a <- ggplot(huang_phospho_hq_potel_anno, aes(average_tm_nbf, average_tm_phospho)) + geom_point(shape = 16, alpha = 0.2, size = 0.5) + geom_line(aes(x,y), data = tibble(x = 40:65, y = 40:65), color = "darkgray", size = 0.25) + geom_point(data = filter(huang_phospho_hq_potel_anno, p_value < 0.05), color = "#1f78b4", size = 0.5, shape = 16) + geom_point(data = filter(huang_phospho_hq_potel_anno, significant_potel), color = "#b2182b", size = 0.5, shape = 16) + geom_point(data = filter(huang_phospho_hq_potel_anno, p_value < 0.05, significant_potel), color = "#ff7f00", size = 0.5, shape = 16) + coord_cartesian(xlim = c(40, 65), ylim = c(40, 65)) + ggpmisc::stat_poly_eq(formula = y ~ x, parse = TRUE, size = rsq_size) + labs(x = expression('T'[m]^unmodified*' '* '('*~degree*C*')'), y = expression('T'[m]^phospho*' '* '('*~degree*C*')')) + theme_bw() + theme_paper
And now we visualize the distributions of functional scores for the significantly vs. non-significantly differently thermally shifted phosphosites for the results by @huang_2019 and @potel_2020
# ochoa functional score on Huang et al data huang_phospho_hq_functional_score <- left_join( huang_phospho_hq %>% dplyr::select(Gene_pSite, average_deltaTm = AveragePhosphoTm, p_value = deltaTmPval) %>% mutate(average_deltaTm = as.numeric(average_deltaTm), p_value = as.numeric(p_value)) %>% mutate(significant = (p_value < 0.05)), ochoa_et_al_anno_fil %>% na.omit %>% dplyr::select(Gene_pSite, functional_score), by = "Gene_pSite") # combine both datasets to save space for plot func_score_combo_df <- bind_rows( filter(combo_mean_tm_pSite_anno_functional_scores, !is.na(functional_score)) %>% dplyr::select(Gene_pSite, significant, functional_score) %>% mutate(group = "This study"), filter(huang_phospho_hq_functional_score, !is.na(functional_score)) %>% dplyr::select(Gene_pSite, significant, functional_score)%>% mutate(group = "Huang et al.") ) p_1c <- ggplot(func_score_combo_df , aes(significant, functional_score)) + geom_violin(aes(fill = significant), alpha = 0.75, color = NA) + geom_boxplot(outlier.shape = NA, width = 0.25, alpha = 0.5) + ggsignif::geom_signif(comparisons = list(c("TRUE", "FALSE")), textsize = anno_size, size = 0.25) + geom_text(stat = "count", aes(label = ..count..), y = 0.01, size = anno_size) + scale_fill_manual(values = c("gray10", "gray70")) + facet_wrap(~ group) + labs(x = "found significantly de/stabilized", y = "functional score by Ochoa et al.") + coord_cartesian(ylim = c(-0.1, 1.1)) + theme_bw() + theme(legend.position = "none", strip.text = element_text(size = 8)) + theme_paper
We now plot examples melting curves showing significantly altered melting profiles in the phosphopeptide data compared to the unmodified proteins
# examples from this study p_1d <- plotDiffPhosphoMeltcurve( g_name = "LYN", p_seq = "_VIEDNEpYTAR_", nbf_df = nbf_peptide_filtered, phospho_df = phospho_peptide_tab_filtered, Gene_pSite = "LYN_pY397") + theme_paper p_1e <- plotDiffPhosphoMeltcurve( g_name = "LMNA", p_seq = "_SGAQASSpTPLpSPTR_", nbf_df = nbf_peptide_filtered, phospho_df = phospho_peptide_tab_filtered, Gene_pSite = "LMNA_pT19pS22") + theme_paper p_1f <- plotDiffPhosphoMeltcurve( g_name = "CARHSP1", p_seq = "_GNVVPpSPLPTRR_", nbf_df = nbf_peptide_filtered, phospho_df = phospho_peptide_tab_filtered, Gene_pSite = "CARHSP1_pS41") + theme_paper
Finally, we combine all plots created above and plot them together
# assemble full figure cowplot::plot_grid( cowplot::plot_grid(p_1a, p_1b, p_1c, labels = c("a", "b", "c"), ncol = 3, rel_widths = c(0.3, 0.3, 0.3)), cowplot::plot_grid(p_1d, p_1e, p_1f, labels = c("d", "e", "f"), ncol = 3, rel_widths = c(0.3, 0.3, 0.3)), ncol = 1 )
supp1_tab <- phospho_peptide_tab_filtered %>% dplyr::select(gene_name, protein_id, sequence, mod_sequence, phospho_site_STY, key, replicate, temperature, rel_value) %>% unite(combo_var, c(key, replicate, temperature), sep = "_") %>% spread(combo_var, rel_value) write_csv(supp1_tab, path = "phosphoTPP-supplementary_data1-supplementary_table_phospho_peptide_fold_changes.csv") supp2_tab <- nbf_peptide_filtered %>% ungroup %>% dplyr::select(gene_name, protein_id, sequence, modifications, key, replicate, temperature, rel_value) %>% unite(combo_var, c(key, replicate, temperature), sep = "_") %>% group_by(gene_name, protein_id, sequence, modifications, combo_var) %>% summarize(rel_value = mean(rel_value, na.rm = TRUE)) %>% ungroup %>% spread(combo_var, rel_value) write_csv(supp2_tab, path = "phosphoTPP-supplementary_data2-supplementary_table_non-modified_peptide_fold_changes.csv") phospho_median_fc_df <- chooseCoherentGeneNames( phospho_peptide_tab_filtered, combo_mean_tm_pSite_anno_functional_scores) %>% group_by(gene_name, mod_sequence, key, temperature) %>% summarize(median_rel_value = median(rel_value, na.rm = TRUE)) %>% ungroup %>% unite(combo_var, c(key, temperature), sep = "_") %>% spread(combo_var, median_rel_value) %>% dplyr::select(gene_name, mod_sequence, phospho_median_fc_37 = rel_fc_126_37, phospho_median_fc_40.4 = rel_fc_127L_40.4, phospho_median_fc_44 = rel_fc_127H_44, phospho_median_fc_46.9 = rel_fc_128L_46.9, phospho_median_fc_49.8 = rel_fc_128H_49.8, phospho_median_fc_52.9 = rel_fc_129L_52.9, phospho_median_fc_55.5 = rel_fc_129H_55.5, phospho_median_fc_58.6 = rel_fc_130L_58.6, phospho_median_fc_62 = rel_fc_130H_62, phospho_median_fc_66.3 = rel_fc_131L_66.3) nbf_median_fc_df <- chooseCoherentGeneNames( nbf_peptide_filtered, combo_mean_tm_pSite_anno_functional_scores) %>% group_by(gene_name, key, temperature) %>% summarize(median_rel_value = median(rel_value, na.rm = TRUE)) %>% ungroup %>% unite(combo_var, c(key, temperature), sep = "_") %>% spread(combo_var, median_rel_value) %>% dplyr::select(gene_name, unmodified_median_fc_37 = rel_fc_126_37, unmodified_median_fc_40.4 = rel_fc_127L_40.4, unmodified_median_fc_44 = rel_fc_127H_44, unmodified_median_fc_46.9 = rel_fc_128L_46.9, unmodified_median_fc_49.8 = rel_fc_128H_49.8, unmodified_median_fc_52.9 = rel_fc_129L_52.9, unmodified_median_fc_55.5 = rel_fc_129H_55.5, unmodified_median_fc_58.6 = rel_fc_130L_58.6, unmodified_median_fc_62 = rel_fc_130H_62, unmodified_median_fc_66.3 = rel_fc_131L_66.3) supp3_tab <- combo_mean_tm_pSite_anno_functional_scores %>% filter(!is.na(mean_nbf), !is.na(mean_phospho)) %>% dplyr::select(gene_name, Gene_pSite, mod_sequence, Tm_mean_phospho = mean_phospho, Tm_mean_unmodified = mean_nbf, mean_delta_meltPoint, significant, functional_score, Tm_phospho_rep1 = `1.x`, Tm_phospho_rep2 = `2.x`, Tm_phospho_rep3 = `3.x`, Tm_phospho_rep4 = `4.x`, Tm_phospho_rep5 = `5.x`, Tm_unmodified_rep1 = `1.y`, Tm_unmodified_rep2 = `2.y`, Tm_unmodified_rep3 = `3.y`, Tm_unmodified_rep4 = `4.y`, Tm_unmodified_rep5 = `5.y`, delta_meltPoint_1, delta_meltPoint_2, delta_meltPoint_3, delta_meltPoint_4, delta_meltPoint_5, p_adj_1, p_adj_2, p_adj_3, p_adj_4, p_adj_5) %>% left_join(phospho_median_fc_df, by = c("gene_name", "mod_sequence")) %>% left_join(nbf_median_fc_df, by = c("gene_name")) write_csv(supp3_tab, path = "phosphoTPP-supplementary_data3-supplementary_table_phospho_non-modified_comparisons.csv")
\renewcommand{\figurename}{\textbf{Supplementary figure}}
lowerFn <- function(data, mapping, dot.size) { p <- ggplot(data = data, mapping = mapping) + geom_point(alpha = 0.15, size = 0.5) + geom_line(aes(x,y), data = tibble(x = 40:70, y = 40:70), color = "darkgray") + ggpmisc::stat_poly_eq(formula = y ~ x, parse = TRUE, size = 2.5) + theme_bw() + theme(axis.text = element_text(size = 6)) p } lowerFnReg <- function(data, mapping, dot.size) { p <- ggplot(data = data, mapping = mapping) + geom_point(alpha = 0.15) + geom_line(aes(x,y), data = tibble(x = 40:70, y = 40:70), color = "darkgray") + ggpmisc::stat_poly_eq(formula = y ~ x, parse = TRUE) + theme_bw() p } technical_rep_theme <- theme( panel.background = element_rect(fill = "lightblue", colour = "lightblue", size = 0.5, linetype = "solid"))
huang_data_combo <- left_join( huang_phospho_hq %>% dplyr::select(uniprot_id = `Uniprot Accession`, Gene_pSite, HighQuality, p_value = deltaTmPval, average_tm_phospho = AveragePhosphoTm) %>% filter(HighQuality == "Yes") %>% mutate(average_tm_phospho = as.numeric(average_tm_phospho), p_value = as.numeric(p_value)), huang_bulk, by = "uniprot_id") ggplot(huang_data_combo, aes(average_tm_nbf, average_tm_phospho)) + geom_point(shape = 16, alpha = 0.2) + geom_line(aes(x,y), data = tibble(x = 42.5:65, y = 42.5:65), color = "darkgray") + coord_fixed(xlim = c(42.5, 65), ylim = c(42.5, 65)) + ggpmisc::stat_poly_eq(formula = y ~ x, parse = TRUE) + labs(x = expression('T'[m]^unmodified* ' ' * '('*~degree*C*')'), y = expression('T'[m]^phospho* ' ' * '('*~degree*C*')')) + theme_bw()
huang_unmod_mat_numeric <- data.frame( apply(huang_raw_df[,2:13], 2, as.numeric)) huang_unmod_pm <- ggpairs(huang_unmod_mat_numeric, upper = "blank", diag = NULL, lower = list(continuous = wrap(lowerFn))) ggmatrix(list(huang_unmod_pm[2, 1] + technical_rep_theme, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, huang_unmod_pm[3, 1], huang_unmod_pm[3,2], NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, huang_unmod_pm[4,1], huang_unmod_pm[4,2], huang_unmod_pm[4,3] + technical_rep_theme, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, huang_unmod_pm[5,1], huang_unmod_pm[5,2], huang_unmod_pm[5,3], huang_unmod_pm[5,4], NULL, NULL, NULL, NULL, NULL, NULL, NULL, huang_unmod_pm[6,1], huang_unmod_pm[6,2], huang_unmod_pm[6,3], huang_unmod_pm[6,4], huang_unmod_pm[6,5]+ technical_rep_theme, NULL, NULL, NULL, NULL, NULL, NULL, huang_unmod_pm[7,1], huang_unmod_pm[7,2], huang_unmod_pm[7,3], huang_unmod_pm[7,4], huang_unmod_pm[7,5], huang_unmod_pm[7,6], NULL, NULL, NULL, NULL, NULL, huang_unmod_pm[8,1], huang_unmod_pm[8,2], huang_unmod_pm[8,3], huang_unmod_pm[8,4], huang_unmod_pm[8,5], huang_unmod_pm[8,6], huang_unmod_pm[8,7]+ technical_rep_theme, NULL, NULL, NULL, NULL, huang_unmod_pm[9,1], huang_unmod_pm[9,2], huang_unmod_pm[9,3], huang_unmod_pm[9,4], huang_unmod_pm[9,5], huang_unmod_pm[9,6], huang_unmod_pm[9,7], huang_unmod_pm[9,8], NULL, NULL, NULL, huang_unmod_pm[10,1], huang_unmod_pm[10,2], huang_unmod_pm[10,3], huang_unmod_pm[10,4], huang_unmod_pm[10,5], huang_unmod_pm[10,6], huang_unmod_pm[10,7], huang_unmod_pm[10,8], huang_unmod_pm[10,9] + technical_rep_theme, NULL, NULL, huang_unmod_pm[11,1], huang_unmod_pm[11,2], huang_unmod_pm[11,3], huang_unmod_pm[11,4], huang_unmod_pm[11,5], huang_unmod_pm[11,6], huang_unmod_pm[11,7], huang_unmod_pm[11,8], huang_unmod_pm[11,9], huang_unmod_pm[11,10], NULL, huang_unmod_pm[12,1], huang_unmod_pm[12,2], huang_unmod_pm[12,3], huang_unmod_pm[12,4], huang_unmod_pm[12,5], huang_unmod_pm[12,6], huang_unmod_pm[12,7], huang_unmod_pm[12,8], huang_unmod_pm[12,9], huang_unmod_pm[12,10], huang_unmod_pm[12,11] + technical_rep_theme), 11, 11, xAxisLabels = c("Tm rep1_1", "Tm rep1_2", "Tm rep2_1", "Tm rep2_2", "Tm rep3_1", "Tm rep3_2", "Tm rep4_1", "Tm rep4_2" ,"Tm rep5_1", "Tm rep5_2", "Tm rep6_1"), yAxisLabels = c("Tm rep1_2", "Tm rep2_1", "Tm rep2_2", "Tm rep3_1", "Tm rep3_2", "Tm rep4_1", "Tm rep4_2", "Tm rep5_1", "Tm rep5_2", "Tm rep6_1", "Tm rep6_2")) + theme(strip.text = element_text(size = 6))
huang_mat_numeric <- data.frame( apply(huang_phospho_hq[,5:14], 2, as.numeric)) huang_pm <- ggpairs(huang_mat_numeric, upper = "blank", diag = NULL, lower = list(continuous = wrap(lowerFn))) ggmatrix(list(huang_pm[2, 1] + technical_rep_theme, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, huang_pm[3, 1], huang_pm[3,2], NULL, NULL, NULL, NULL, NULL, NULL, NULL, huang_pm[4,1], huang_pm[4,2], huang_pm[4,3] + technical_rep_theme, NULL, NULL, NULL, NULL, NULL, NULL, huang_pm[5,1], huang_pm[5,2], huang_pm[5,3], huang_pm[5,4], NULL, NULL, NULL, NULL, NULL, huang_pm[6,1], huang_pm[6,2], huang_pm[6,3], huang_pm[6,4], huang_pm[6,5]+ technical_rep_theme, NULL, NULL, NULL, NULL, huang_pm[7,1], huang_pm[7,2], huang_pm[7,3], huang_pm[7,4], huang_pm[7,5], huang_pm[7,6], NULL, NULL, NULL, huang_pm[8,1], huang_pm[8,2], huang_pm[8,3], huang_pm[8,4], huang_pm[8,5], huang_pm[8,6], huang_pm[8,7]+ technical_rep_theme, NULL, NULL, huang_pm[9,1], huang_pm[9,2], huang_pm[9,3], huang_pm[9,4], huang_pm[9,5], huang_pm[9,6], huang_pm[9,7], huang_pm[9,8], NULL, huang_pm[10,1], huang_pm[10,2], huang_pm[10,3], huang_pm[10,4], huang_pm[10,5], huang_pm[10,6], huang_pm[10,7], huang_pm[10,8], huang_pm[10,9] + technical_rep_theme), 9, 9, xAxisLabels = c("Tm rep1_1", "Tm rep1_2", "Tm rep2_1", "Tm rep2_2", "Tm rep3_1", "Tm rep3_2", "Tm rep4_1", "Tm rep4_2" ,"Tm rep5_1"), yAxisLabels = c("Tm rep1_2", "Tm rep2_1", "Tm rep2_2", "Tm rep3_1", "Tm rep3_2", "Tm rep4_1", "Tm rep4_2", "Tm rep5_1", "Tm rep5_2")) + theme(strip.text = element_text(size = 6))
pm <- ggpairs(nbf_tm_rep_df, 2:6, upper = "blank", diag = NULL, lower = list(continuous = wrap(lowerFnReg))) ggmatrix(list(pm[2, 1], NULL, NULL, NULL, pm[3, 1], pm[3,2], NULL, NULL, pm[4, 1], pm[4,2], pm[4,3], NULL, pm[5, 1], pm[5,2], pm[5,3], pm[5,4]), 4, 4, xAxisLabels = c("Tm rep1", "Tm rep2", "Tm rep3", "Tm rep4"), yAxisLabels = c("Tm rep2", "Tm rep3","Tm rep4","Tm rep5"))
pm_phospho <- ggpairs(phospho_tm_rep_df, 5:9, upper = "blank", diag = NULL, lower = list(continuous = wrap(lowerFnReg))) ggmatrix(list(pm_phospho[2, 1], NULL, NULL, NULL, pm_phospho[3, 1], pm_phospho[3,2], NULL, NULL, pm_phospho[4, 1], pm_phospho[4,2], pm_phospho[4,3], NULL, pm_phospho[5, 1], pm_phospho[5,2], pm_phospho[5,3], pm_phospho[5,4]), 4, 4, xAxisLabels = c("Tm rep1", "Tm rep2", "Tm rep3", "Tm rep4"), yAxisLabels = c("Tm rep2", "Tm rep3","Tm rep4","Tm rep5"))
colnames(huang_phospho_hq)[19] <- "deltaTm" delta_tm_df <- bind_rows( huang_phospho_hq %>% mutate(delta_tm = as.numeric(deltaTm)) %>% dplyr::select(delta_tm) %>% mutate(dataset = "Huang et al."), combo_mean_tm_significant_df %>% rowwise() %>% mutate(mean_delta_meltPoint = mean(c(delta_meltPoint_1, delta_meltPoint_2, delta_meltPoint_3, delta_meltPoint_4, delta_meltPoint_5), na.rm = TRUE)) %>% ungroup %>% dplyr::select(delta_tm = mean_delta_meltPoint) %>% mutate(dataset = "This study") ) ggplot(delta_tm_df, aes(delta_tm, fill = dataset)) + geom_density(alpha = 0.25) + xlab(expression(Delta*'T'[m])) + theme_bw() + theme(legend.position = "bottom")
mxq_unmod_df_huang <- read_delim("../dat/Huang_nonphospho_evidence.txt", delim = "\t") mxq_unmod_df_potel <- read_delim("../dat/Potel_nonphospho_evidence.txt", delim = "\t") mxq_phospho_df_huang <- read_delim("../dat/Huang_phospho_evidence.txt", delim = "\t") mxq_phospho_df_potel <- read_delim("../dat/Potel_phospho_evidence.txt", delim = "\t") mxq_combo_df <- bind_rows( mxq_unmod_df_huang %>% dplyr::select(PIF) %>% mutate(dataset = "Huang et al. non-modified", PIF = as.numeric(PIF)), mxq_phospho_df_huang %>% dplyr::select(PIF) %>% mutate(dataset = "Huang et al. enriched phosphopeptides", PIF = as.numeric(PIF)), mxq_unmod_df_potel %>% dplyr::select(PIF) %>% mutate(dataset = "This study non-modified", PIF = as.numeric(PIF)), mxq_phospho_df_potel %>% dplyr::select(PIF) %>% mutate(dataset = "This study enriched phosphopeptides", PIF = as.numeric(PIF)) ) ggplot(mxq_combo_df, aes(dataset, PIF)) + geom_violin(alpha = 0.25, fill = "gray", color = "gray") + geom_boxplot(width = 0.15, outlier.shape = NA) + theme_bw() + labs(x = "") + theme(legend.position = "bottom", axis.text.x = element_text(angle = 90, hjust =1))
nbf_mean_tm_df <- nbf_tm_rep_fil %>% ungroup %>% rowwise() %>% mutate(mean_Tm = mean(c(`1`, `2`, `3`, `4`, `5`), na.rm = TRUE)) %>% ungroup %>% dplyr::select(gene_name, mean_Tm) huang_mapping <- read_delim("../dat/huang_mapping.txt", delim = "\t") huang_unmod_mean_tm_df <- huang_bulk %>% left_join(huang_mapping %>% dplyr::select(uniprot_id = From, gene_name = To), by = "uniprot_id") %>% dplyr::select(gene_name, huang_mean_tm = average_tm_nbf) %>% group_by(gene_name) %>% summarize(huang_mean_tm = mean(huang_mean_tm, na.rm = TRUE)) %>% ungroup %>% filter(!is.nan(huang_mean_tm)) tan_hq_meltCurve_param_df <- readRDS("../dat/tan_hq_meltCurve_param_df.rds") %>% dplyr::select(gene_name, tan_Tm = meltPoint) becher_cell_hela <- read_xlsx("../dat/Becher-s2.0-S0092867418303854-mmc4-TableS4-HeLa_G1S.xlsx", sheet = 2) %>% dplyr::select(gene_name, becher_cell_hela_mean_Tm = Tm.median) %>% mutate(becher_cell_hela_mean_Tm = as.numeric(becher_cell_hela_mean_Tm)) combo_tm_df <- tan_hq_meltCurve_param_df %>% full_join(becher_cell_hela, by = "gene_name") %>% full_join(nbf_mean_tm_df, by = "gene_name") %>% full_join(huang_unmod_mean_tm_df, by = "gene_name") pm_others <- ggpairs(combo_tm_df, 2:5, upper = "blank", diag = NULL, lower = list(continuous = wrap(lowerFnReg))) ggmatrix(list(pm_others[2, 1], NULL, NULL, pm_others[3, 1], pm_others[3,2], NULL, pm_others[4, 1], pm_others[4,2], pm_others[4,3]), 3, 3, xAxisLabels = c("Tan et al. (K562)", "Becher et al. (HeLa)", "This study (HeLa)"), yAxisLabels = c("Becher et al. (HeLa)", "This study (HeLa)", "Huang et al. (HEK293T)")) + theme(strip.text = element_text(size = 8))
Abundance vs. delta Tm comparison
correctTop3ForTpp <- function(df, ref_channel = "signal_sum_126", sig_string = "signal_sum"){ total_signal_sum <- sum( as.matrix((df %>% dplyr::select(matches(sig_string))))[1,] ) ref_2_total_frac <- as.vector((df %>% dplyr::select(matches(ref_channel)))[1,]) / total_signal_sum df_out <- df %>% dplyr::select(protein_id, gene_name, top3) %>% mutate(top3Corrected = top3 * ref_2_total_frac) %>% dplyr::select(-top3) return(df_out) } combo_corrected_top3_nbf <- bind_rows(lapply(nbf_protein_tabs, function(tab){ tab_fil <- filter(tab, qupm > 0, !grepl("##", protein_id)) per_tab_df <- bind_rows(lapply(seq(nrow(tab_fil)), function(i){ correctTop3ForTpp(df = tab_fil[i,]) %>% mutate(replicate = i) })) return(per_tab_df) }))
combo_corrected_top3_nbf <- readRDS("../prerun/combo_corrected_top3_nbf.rds")
combo_corrected_mean_top3_nbf <- combo_corrected_top3_nbf %>% group_by(gene_name) %>% summarize(mean_top3_corrected = mean(top3Corrected$signal_sum_126, na.rm = TRUE)) mean_deltaTm_df <- combo_mean_tm_pSite_anno %>% rowwise() %>% mutate(mean_delta_meltPoint = -mean( c(delta_meltPoint_1, delta_meltPoint_2, delta_meltPoint_3, delta_meltPoint_4, delta_meltPoint_5), na.rm = TRUE)) %>% ungroup %>% dplyr::select(gene_name, Gene_pSite, mod_sequence, phospho_site_STY, mean_delta_meltPoint, significant) corrected_mean_top3_deltaTm_df <- left_join( mean_deltaTm_df, chooseCoherentGeneNames( combo_corrected_mean_top3_nbf, mean_deltaTm_df), by = "gene_name" ) ggplot(corrected_mean_top3_deltaTm_df, aes(mean_top3_corrected, mean_delta_meltPoint)) + geom_point(alpha = 0.25) + geom_point(color = "red", data = filter(corrected_mean_top3_deltaTm_df, significant)) + labs(x = "Protein abundance (top3)", y = "delta Tm") + theme_bw()
Delta Tm comparison between the two studies
delta_tm <- left_join( mean_deltaTm_df %>% dplyr::select(Gene_pSite, mean_delta_meltPoint, significant), huang_phospho_hq_potel_anno %>% mutate(mean_delta_meltPoint_huang = average_tm_phospho - average_tm_nbf) %>% dplyr::select(Gene_pSite, mean_delta_meltPoint_huang, p_value), by = "Gene_pSite" ) %>% na.omit() ggplot(delta_tm, aes(mean_delta_meltPoint_huang, mean_delta_meltPoint)) + geom_point(alpha = 0.5, shape = 16) + geom_point(data = filter(delta_tm, significant), color = "#b2182b", shape = 16) + geom_point(data = filter(delta_tm, p_value < 0.05), color = "#1f78b4", shape = 16) + geom_point(data = filter(delta_tm, p_value < 0.05, significant), color = "#ff7f00", shape = 16) + geom_smooth(method = "lm", color = "darkgray", size = 1) + ggrepel::geom_text_repel(aes(label = Gene_pSite), data = filter(delta_tm, p_value < 0.05, significant)) + ggpmisc::stat_poly_eq(formula = y ~ x, parse = TRUE) + labs(x = expression(Delta*' T'[m]^Potel*' '* '('*~degree*C*')'), y = expression(Delta* ' T'[m]^Huang*' '* '('*~degree*C*')')) + theme_bw()
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.