knitr::opts_chunk$set(echo = TRUE)
library(phosphoTPP)
# set thresholds for quality filtering s2i_thres <- 0.5 p2t_thres <- 4 mascot_score_thres <- 20 fdr_at_score_thres <- 0.01 r2_thres <- 0.8 plateau_thres <- 0.2
# load config tables cfg_tab <- readxl::read_xlsx(file.path("../dat", "TPP-TR_config_phospho.xlsx")) cfg_tab %>% dplyr::select(-Path)
# set plotting sizes anno_size <- 2 pval_size <- 2 rsq_size <- 2 # define plotting theme theme_paper <- theme(axis.title = element_text(size = 8), axis.text = element_text(size = 6), legend.text = element_text(size = 6), title = element_text(size = 9))
temperature_anno <- cfg_tab %>% dplyr::select(matches('[0-9]{3}')) %>% gather(key, temperature) %>% mutate(channel = paste("sig", key, sep = "_")) %>% mutate(fc_channel = paste("rel_fc", key, sep = "_")) temperature_anno_prot <- cfg_tab %>% dplyr::select(matches('[0-9]{3}')) %>% gather(key, temperature) %>% mutate(channel = paste("signal_sum", key, sep = "_")) %>% mutate(fc_channel = paste("rel_fc", key, sep = "_"))
# read in lists of phospho and nbt protein and peptides tables per replicate phospho_protein_files <- list.files("../dat", pattern = 'phospho.+protein', full.names = TRUE) phospho_protein_files
nbf_protein_files <- list.files("../dat", pattern = 'NBF.+protein', full.names = TRUE) nbf_protein_files
phospho_peptide_files <- list.files("../dat", pattern = 'phospho.+peptide.+txt', full.names = TRUE) phospho_peptide_files
phosho_mxq_peptide_files <- list.files("../dat", pattern = 'pTPP.+txt', full.names = TRUE) phosho_mxq_peptide_files
nbf_peptide_files <- list.files("../dat", pattern = 'NBF.+peptide', full.names = TRUE) nbf_peptide_files
# read data files into lists of replicates phospho_protein_tabs <- lapply(seq_len(length(phospho_protein_files)), function(file_i){ read_delim(phospho_protein_files[file_i], delim = "\t") %>% mutate(replicate = file_i) }) nbf_protein_tabs <- lapply(seq_len(length(nbf_protein_files)), function(file_i){ read_delim(nbf_protein_files[file_i], delim = "\t") %>% mutate(replicate = file_i) }) phospho_peptide_tabs <- lapply(seq_len(length(phospho_peptide_files)), function(file_i){ ibq_tab <- read_delim(phospho_peptide_files[file_i], delim = "\t") %>% filter_at(vars(starts_with("sig_")), all_vars(!is.na(.))) mxq_tab <- read_delim(phosho_mxq_peptide_files[file_i], delim = "\t") combined_tab <- left_join( ibq_tab %>% dplyr::select(protein_id, sequence, modifications, matches("sig_"), s2i, p2t, msms_id), mxq_tab %>% dplyr::select(protein_id_mxq = Proteins, sequence = Sequence, msms_id = `MS/MS scan number`, mod_sequence = `Modified sequence`, phospho_prob = `Phospho (STY) Probabilities`, phospho_score_diff = `Phospho (STY) Score Diffs`, phospho_site_STY = `Phospho (STY)`), by = c("sequence", "msms_id")) %>% filter(!is.na(protein_id_mxq)) %>% group_by(sequence, msms_id) %>% filter(!duplicated(mod_sequence)) %>% ungroup %>% filter(getProb(phospho_prob)) %>% filter(p2t >= p2t_thres, s2i >= s2i_thres) %>% mutate(replicate = file_i) %>% mutate(rel_fc_126 = sig_126 / sig_126, rel_fc_127L = sig_127L / sig_126, rel_fc_127H = sig_127H / sig_126, rel_fc_128L = sig_128L / sig_126, rel_fc_128H = sig_128H / sig_126, rel_fc_129L = sig_129L / sig_126, rel_fc_129H = sig_129H / sig_126, rel_fc_130L = sig_130L / sig_126, rel_fc_130H = sig_130H / sig_126, rel_fc_131L = sig_131L / sig_126) return(combined_tab) }) phospho_non_phospho_peptide_tabs <- lapply(seq_len(length(phospho_peptide_files)), function(file_i){ read_delim(phospho_peptide_files[file_i], delim = "\t") %>% filter(is_decoy == 0, score > mascot_score_thres & is_unique == 1, rank == 1, fdr_at_score < fdr_at_score_thres, !is.na(modifications), grepl("TMT", modifications), !grepl("Phospho", modifications), in_quantification_of_protein == 1, !grepl("##", protein_id)) %>% filter_at(vars(starts_with("sig_")), all_vars(!is.na(.))) %>% mutate(replicate = file_i) %>% mutate(rel_fc_126 = sig_126 / sig_126, rel_fc_127L = sig_127L / sig_126, rel_fc_127H = sig_127H / sig_126, rel_fc_128L = sig_128L / sig_126, rel_fc_128H = sig_128H / sig_126, rel_fc_129L = sig_129L / sig_126, rel_fc_129H = sig_129H / sig_126, rel_fc_130L = sig_130L / sig_126, rel_fc_130H = sig_130H / sig_126, rel_fc_131L = sig_131L / sig_126) }) nbf_peptide_tabs <- lapply(seq_len(length(nbf_peptide_files)), function(file_i){ read_delim(nbf_peptide_files[file_i], delim = "\t") %>% filter(is_decoy == 0, score > mascot_score_thres & is_unique == 1, rank == 1, fdr_at_score < fdr_at_score_thres, !is.na(modifications), grepl("TMT", modifications), in_quantification_of_protein == 1, !grepl("##", protein_id)) %>% filter_at(vars(starts_with("sig_")), all_vars(!is.na(.))) %>% mutate(replicate = file_i) %>% mutate(rel_fc_126 = sig_126 / sig_126, rel_fc_127L = sig_127L / sig_126, rel_fc_127H = sig_127H / sig_126, rel_fc_128L = sig_128L / sig_126, rel_fc_128H = sig_128H / sig_126, rel_fc_129L = sig_129L / sig_126, rel_fc_129H = sig_129H / sig_126, rel_fc_130L = sig_130L / sig_126, rel_fc_130H = sig_130H / sig_126, rel_fc_131L = sig_131L / sig_126) })
# compute normalization factors ## first compute median normalization factors for shared non-phospho peptides between ## matching phospho-enriched and nbf samples n_rep <- min(length(phospho_peptide_files), length(nbf_peptide_files)) per_exp_norm_factors <- lapply(seq_len(n_rep), function(rep_i){ norm_in_df <- bind_rows(nbf_peptide_tabs[[rep_i]] %>% mutate(dataset_id = paste("nbf", replicate, sep = "_")), phospho_non_phospho_peptide_tabs[[rep_i]] %>% mutate(dataset_id = paste("phospho", replicate, sep = "_"))) nbf_peptide_norm_factors <- getNormFactors4Data( pep_tab = norm_in_df, temperature_anno = temperature_anno, filter_criteria = list("rel_fc_127H_low" = 0, "rel_fc_129H_low" = 0.4, "rel_fc_129H_high" = 0.6, "rel_fc_130H_low" = 0, "rel_fc_130H_high" = 0.3, "rel_fc_131L_low" = 0, "rel_fc_131L_high" = 0.2), n_rep = 2) median_norm_factors <- nbf_peptide_norm_factors out_df <- tibble( temperature = nbf_peptide_norm_factors[[1]]$temperature, median_norm_factor = nbf_peptide_norm_factors[[1]]$median_rel_value / nbf_peptide_norm_factors[[2]]$median_rel_value) return(out_df) }) per_exp_norm_factors
## now compute normalizing factors retrieved from curves fitted on hq data shared ## between different nbf replicates norm_in_df <- bind_rows(nbf_peptide_tabs) %>% mutate(dataset_id = paste("nbf", replicate, sep = "_"))#, nbf_peptide_norm_factors <- getNormFactors4Data( pep_tab = norm_in_df, temperature_anno = temperature_anno, filter_criteria = list("rel_fc_127H_low" = 1, "rel_fc_129H_low" = 0.4, "rel_fc_129H_high" = 0.6, "rel_fc_130H_low" = 0, "rel_fc_130H_high" = 0.3, "rel_fc_131L_low" = 0, "rel_fc_131L_high" = 0.2), n_rep = n_rep) nbf_peptide_norm_factors
# retrieve one table of per normalized nbf/phospho peptide fold changes # and compare to non-normalized one nbf_peptide_filtered_not_norm <- bind_rows(lapply(seq_len(length(nbf_peptide_tabs)), function(tab_i){ nbf_peptide_tabs[[tab_i]] %>% dplyr::select(protein_id, sequence, modifications, matches("rel_fc_"), replicate, score) %>% gather(key, value, -protein_id, -sequence, -modifications, -replicate, -score) %>% group_by(protein_id, sequence, replicate, modifications, key) %>% filter(score == max(score)) %>% left_join(temperature_anno, by = c("key" = "fc_channel")) %>% mutate(rel_value = value * 1) %>% left_join(nbf_protein_tabs[[tab_i]] %>% dplyr::select(protein_id, gene_name), by = c("protein_id")) })) nbf_peptide_filtered <- bind_rows(lapply(seq_len(length(nbf_peptide_tabs)), function(tab_i){ nbf_peptide_tabs[[tab_i]] %>% dplyr::select(protein_id, sequence, modifications, matches("rel_fc_"), replicate, score) %>% gather(key, value, -protein_id, -sequence, -modifications, -replicate, -score) %>% group_by(protein_id, sequence, replicate, modifications, key) %>% filter(score == max(score)) %>% left_join(temperature_anno, by = c("key" = "fc_channel")) %>% left_join(nbf_peptide_norm_factors[[tab_i]], by = "temperature") %>% mutate(rel_value = value * norm_factor) %>% left_join(nbf_protein_tabs[[tab_i]] %>% dplyr::select(protein_id, gene_name), by = c("protein_id")) })) phospho_peptide_tab_filtered_not_norm <- bind_rows(lapply(seq_len(length(phospho_peptide_tabs)), function(tab_i){ phospho_peptide_tabs[[tab_i]] %>% dplyr::select(-matches("sig_")) %>% gather(key, value, matches("rel_fc_")) %>% group_by(protein_id, sequence, mod_sequence, phospho_site_STY, replicate, key) %>% summarise(value = mean(value, na.rm = TRUE), mean_s2i = mean(s2i, na.rm = TRUE), mean_p2t = mean(p2t, na.rm = TRUE)) %>% ungroup %>% left_join(temperature_anno %>% dplyr::select(-key), by = c("key" = "fc_channel")) %>% mutate(rel_value = value * 1) %>% left_join(phospho_protein_tabs[[tab_i]] %>% dplyr::select(protein_id, gene_name), by = c("protein_id")) %>% filter(!grepl("##", protein_id)) })) phospho_peptide_tab_filtered <- bind_rows(lapply(seq_len(length(phospho_peptide_tabs)), function(tab_i){ phospho_peptide_tabs[[tab_i]] %>% dplyr::select(-matches("sig_")) %>% gather(key, value, matches("rel_fc_")) %>% group_by(protein_id, sequence, mod_sequence, phospho_site_STY, replicate, key) %>% summarise(value = mean(value, na.rm = TRUE), mean_s2i = mean(s2i, na.rm = TRUE), mean_p2t = mean(p2t, na.rm = TRUE)) %>% ungroup %>% left_join(temperature_anno %>% dplyr::select(-key), by = c("key" = "fc_channel")) %>% left_join(per_exp_norm_factors[[tab_i]], by = "temperature") %>% left_join(nbf_peptide_norm_factors[[tab_i]], by = "temperature") %>% mutate(rel_value = value * median_norm_factor * norm_factor) %>% left_join(phospho_protein_tabs[[tab_i]] %>% dplyr::select(protein_id, gene_name), by = c("protein_id")) %>% filter(!grepl("##", protein_id)) })) p_qc1_1 <- nbf_peptide_filtered_not_norm %>% ggplot(aes(temperature, rel_value)) + geom_boxplot(aes(group = temperature)) + facet_wrap(~replicate) + coord_cartesian(ylim = c(0, 2.5)) p_qc1_3 <- nbf_peptide_filtered %>% ggplot(aes(temperature, rel_value)) + geom_boxplot(aes(group = temperature)) + facet_wrap(~replicate) + coord_cartesian(ylim = c(0, 2.5)) p_qc1_2 <- phospho_peptide_tab_filtered_not_norm %>% ggplot(aes(temperature, rel_value)) + geom_boxplot(aes(group = temperature)) + facet_wrap(~replicate) + coord_cartesian(ylim = c(0, 2.5)) p_qc1_4 <- phospho_peptide_tab_filtered %>% ggplot(aes(temperature, rel_value)) + geom_boxplot(aes(group = temperature)) + facet_wrap(~replicate) + coord_cartesian(ylim = c(0, 2.5)) cowplot::plot_grid(p_qc1_1 + ggtitle("raw nbf"), p_qc1_2 + ggtitle("raw phospho"), p_qc1_3 + ggtitle("curve-based norm nbf"), p_qc1_4 + ggtitle("curve-based norm phospho, whith prior median norm by non-phospho peptides"), ncol = 2)
# fit curves to nbf dataset out_nbf_df <- nbf_peptide_filtered %>% group_by(gene_name, replicate) %>% do({ suppressWarnings(fitMeltcurveModelAndEval(df = .)) }) %>% ungroup # filter for high quality fits nbf_tm_rep_df <- out_nbf_df %>% filter(rSq > r2_thres, plateau < plateau_thres) %>% dplyr::select(gene_name, replicate, meltPoint) %>% spread(replicate, meltPoint) pm <- GGally::ggpairs(nbf_tm_rep_df, 2:6, upper = "blank", diag = NULL, lower = list(continuous = wrap(lowerFn))) GGally::ggmatrix(list(pm[2, 1], NULL, NULL, NULL, pm[3, 1], pm[3,2], NULL, NULL, pm[4, 1], pm[4,2], pm[4,3], NULL, pm[5, 1], pm[5,2], pm[5,3], pm[5,4]), 4, 4, xAxisLabels = c("Tm rep1", "Tm rep2", "Tm rep3", "Tm rep4"), yAxisLabels = c("Tm rep2", "Tm rep3","Tm rep4","Tm rep5"))
# fit curves to not-normalized nbf dataset out_nbf_nn_df <- nbf_peptide_filtered_not_norm %>% group_by(gene_name, replicate) %>% do({ suppressWarnings(fitMeltcurveModelAndEval(df = .)) }) %>% ungroup # filter for high quality fits nbf_tm_rep_nn_df <- out_nbf_nn_df %>% filter(rSq > r2_thres, plateau < plateau_thres) %>% dplyr::select(gene_name, replicate, meltPoint) %>% spread(replicate, meltPoint) pm_nn <- GGally::ggpairs(nbf_tm_rep_nn_df, 2:6, upper = "blank", diag = NULL, lower = list(continuous = wrap(lowerFn))) GGally::ggmatrix(list(pm_nn[2, 1], NULL, NULL, NULL, pm_nn[3, 1], pm_nn[3,2], NULL, NULL, pm_nn[4, 1], pm_nn[4,2], pm_nn[4,3], NULL, pm_nn[5, 1], pm_nn[5,2], pm_nn[5,3], pm_nn[5,4]), 4, 4, xAxisLabels = c("Tm rep1", "Tm rep2", "Tm rep3", "Tm rep4"), yAxisLabels = c("Tm rep2", "Tm rep3","Tm rep4","Tm rep5"))
# fit phospho peptide melting curves out_phospho_df <- phospho_peptide_tab_filtered %>% dplyr::select(protein_id, gene_name, sequence, mod_sequence, mean_s2i, mean_p2t, phospho_site_STY, replicate, rel_value, temperature) %>% group_by(protein_id, gene_name, sequence, mod_sequence, mean_s2i, mean_p2t, phospho_site_STY, replicate) %>% do({ suppressWarnings(fitMeltcurveModelAndEval(df = .)) }) %>% ungroup phospho_tm_rep_df <- out_phospho_df %>% filter(rSq > r2_thres, plateau < plateau_thres) %>% dplyr::select(gene_name, sequence, mod_sequence, phospho_site_STY, replicate, meltPoint) %>% spread(replicate, meltPoint) pm_phospho <- GGally::ggpairs(phospho_tm_rep_df, 5:9, upper = "blank", diag = NULL, lower = list(continuous = wrap(lowerFn))) GGally::ggmatrix(list(pm_phospho[2, 1], NULL, NULL, NULL, pm_phospho[3, 1], pm_phospho[3,2], NULL, NULL, pm_phospho[4, 1], pm_phospho[4,2], pm_phospho[4,3], NULL, pm_phospho[5, 1], pm_phospho[5,2], pm_phospho[5,3], pm_phospho[5,4]), 4, 4, xAxisLabels = c("Tm rep1", "Tm rep2", "Tm rep3", "Tm rep4"), yAxisLabels = c("Tm rep2", "Tm rep3","Tm rep4","Tm rep5"))
# fit non-norm phospho peptide melting curves out_phospho_nn_df <- phospho_peptide_tab_filtered_not_norm %>% dplyr::select(protein_id, gene_name, sequence, mod_sequence, mean_s2i, mean_p2t, phospho_site_STY, replicate, rel_value, temperature) %>% group_by(protein_id, gene_name, sequence, mod_sequence, mean_s2i, mean_p2t, phospho_site_STY, replicate) %>% do({ suppressWarnings(fitMeltcurveModelAndEval(df = .)) }) %>% ungroup phospho_tm_rep_nn_df <- out_phospho_nn_df %>% filter(rSq > r2_thres, plateau < plateau_thres) %>% dplyr::select(gene_name, sequence, mod_sequence, phospho_site_STY, replicate, meltPoint) %>% spread(replicate, meltPoint) pm_phospho_nn <- GGally::ggpairs(phospho_tm_rep_nn_df, 5:9, upper = "blank", diag = NULL, lower = list(continuous = wrap(lowerFn))) GGally::ggmatrix(list(pm_phospho_nn[2, 1], NULL, NULL, NULL, pm_phospho_nn[3, 1], pm_phospho_nn[3,2], NULL, NULL, pm_phospho_nn[4, 1], pm_phospho_nn[4,2], pm_phospho_nn[4,3], NULL, pm_phospho_nn[5, 1], pm_phospho_nn[5,2], pm_phospho_nn[5,3], pm_phospho_nn[5,4]), 4, 4, xAxisLabels = c("Tm rep1", "Tm rep2", "Tm rep3", "Tm rep4"), yAxisLabels = c("Tm rep2", "Tm rep3","Tm rep4","Tm rep5"))
# create scatterplot of phospho and nbf Tm with non-normalized data phospho_combo_nn_df <- left_join( out_phospho_nn_df %>% filter(rSq > r2_thres, plateau < plateau_thres) %>% group_by(protein_id, gene_name, sequence, mod_sequence, phospho_site_STY) %>% summarise(meltPoint = mean(meltPoint, na.rm = TRUE), plateau = mean(plateau, na.rm = TRUE), rSq = mean(rSq, na.rm = TRUE), n_rep = n()) %>% ungroup, out_nbf_nn_df %>% group_by(gene_name) %>% filter(rSq > r2_thres, plateau < plateau_thres) %>% summarise(meltPoint = mean(meltPoint, na.rm = TRUE), plateau = mean(plateau, na.rm = TRUE), rSq = mean(rSq, na.rm = TRUE), n_rep = n()) %>% ungroup, by = c("gene_name"), suffix = c("_phospho", "_flowThrough")) ggplot(phospho_combo_nn_df, aes(meltPoint_flowThrough, meltPoint_phospho)) + geom_point(alpha = 0.25, shape = 16) + geom_line(aes(x,y), data = tibble(x = 40:77.5, y = 40:77.5), color = "darkgray") + ggpmisc::stat_poly_eq(formula = y ~ x, parse = TRUE) + coord_cartesian(xlim = c(40, 77.5), ylim = c(40, 77.5)) + labs(x = expression('Tm[nbf] ('*~degree*C*')'), y = expression('Tm[phospho] ('*~degree*C*')')) + theme_bw()
nbf_tm_rep_fil <- out_nbf_df %>% filter(rSq > r2_thres, plateau < plateau_thres) %>% group_by(gene_name) %>% filter(n() > 2) %>% # should be changed to > 2 if we have 5 replicates ungroup %>% dplyr::select(gene_name, replicate, meltPoint) %>% spread(replicate, meltPoint) phospho_tm_rep_fil <- out_phospho_df %>% filter(rSq > r2_thres, plateau < plateau_thres) %>% group_by(gene_name, sequence, mod_sequence, phospho_site_STY) %>% filter(n() > 2) %>% # should be changed to > 2 if we have 5 replicates ungroup() %>% dplyr::select(gene_name, sequence, mod_sequence, phospho_site_STY, replicate, meltPoint) %>% spread(replicate, meltPoint) combo_tm_df <- left_join(phospho_tm_rep_fil, nbf_tm_rep_fil, by = "gene_name") %>% rowwise() %>% mutate(id = paste(gene_name, mod_sequence, phospho_site_STY, sep = "_")) %>% ungroup() combo_mean_tm_stringent_df <- combo_tm_df %>% rowwise() %>% mutate(mean_phospho = mean(c(`1.x`, `2.x`, `3.x`, `4.x`, `5.x`), na.rm = TRUE), mean_nbf = mean(c(`1.y`, `2.y`, `3.y`, `4.y`, `5.y`), na.rm = TRUE)) %>% ungroup() %>% rowwise() %>% mutate(delta_meltPoint_1 = `1.y` - `1.x`, delta_meltPoint_2 = `2.y` - `2.x`, delta_meltPoint_3 = `3.y` - `3.x`, delta_meltPoint_4 = `4.y` - `4.x`, delta_meltPoint_5 = `5.y` - `5.x`) %>% ungroup() %>% mutate(delta_meltPoint_1_scaled = scale(delta_meltPoint_1), delta_meltPoint_2_scaled = scale(delta_meltPoint_2), delta_meltPoint_3_scaled = scale(delta_meltPoint_3), delta_meltPoint_4_scaled = scale(delta_meltPoint_4), delta_meltPoint_5_scaled = scale(delta_meltPoint_5)) %>% mutate(p_value_1 = 1 - pnorm(abs(delta_meltPoint_1_scaled)), p_value_2 = 1 - pnorm(abs(delta_meltPoint_2_scaled)), p_value_3 = 1 - pnorm(abs(delta_meltPoint_3_scaled)), p_value_4 = 1 - pnorm(abs(delta_meltPoint_4_scaled)), p_value_5 = 1 - pnorm(abs(delta_meltPoint_5_scaled))) %>% ungroup() %>% mutate(p_adj_1 = p.adjust(p_value_1, method = "BH"), p_adj_2 = p.adjust(p_value_2, method = "BH"), p_adj_3 = p.adjust(p_value_3, method = "BH"), p_adj_4 = p.adjust(p_value_4, method = "BH"), p_adj_5 = p.adjust(p_value_5, method = "BH")) %>% rowwise() %>% mutate(significant = countAtLeast(x = c(p_adj_1, p_adj_2, p_adj_3, p_adj_4, p_adj_5), val_with_sign = c(delta_meltPoint_1_scaled, delta_meltPoint_2_scaled, delta_meltPoint_3_scaled, delta_meltPoint_4_scaled, delta_meltPoint_5_scaled), min_n = 2)) %>% ungroup()
# choose gene names to maximize overlap with Huang et al. ## load Huang et al. phospho dataset huang_phospho_tab <- readxl::read_xlsx(file.path("../dat", "41592_2019_499_MOESM4_ESM.xlsx"), skip = 2) huang_phospho_hq <- filter( huang_phospho_tab, `High Quality ΔTm? (Curve Fit R2 > 0.8, Localization FDR < 0.01, ΔTm P-value)` == "Yes") combo_mean_tm_fcs_choosen_gene_names <- chooseCoherentGeneNames(in_df = combo_mean_tm_fcs, their_data = huang_phospho_tab)
# annotate phospho sites disordered_anno <- read_delim( file.path("../dat", "all_uniprot_instrinsically_disordered.txt"), delim = "\t") %>% within(rank[grepl("not found", rank)] <- NA) %>% mutate(rank = as.numeric(rank)) gene_name_anno <- read_delim(file.path( "../dat", "disorder_protein_id_gene_name_mapping.txt"), delim = "\t") disordered_anno <- left_join( disordered_anno, gene_name_anno %>% dplyr::select(protein = From, gene_name = To), by = "protein" ) %>% group_by(gene_name) %>% filter(nchar(seq) == max(nchar(seq))) %>% filter(seq == seq[1]) %>% ungroup combo_mean_tm_pSite_anno <- left_join( combo_mean_tm_fcs_choosen_gene_names, disordered_anno %>% dplyr::select(gene_name, seq), by = "gene_name") %>% rowwise() %>% mutate(Gene_pSite = getPhosphoSiteId( gene_name = gene_name, string = mod_sequence, seq = seq)) %>% ungroup %>% mutate(significant_huang = Gene_pSite %in% filter(huang_phospho_hq, as.numeric(`ΔTm P-Value`) < 0.05)$Gene_pSite) p_1b <- ggplot(combo_mean_tm_pSite_anno, aes(mean_nbf, mean_phospho)) + geom_point(shape = 16, alpha = 0.2, size = 0.5) + geom_line(aes(x,y), data = tibble(x = 40:65, y = 40:65), color = "darkgray", size = 0.25) + geom_point(data = filter(combo_mean_tm_pSite_anno, significant), color = "#b2182b", shape = 16, size = 0.5) + geom_point(data = filter(combo_mean_tm_pSite_anno, significant_huang), color = "#1f78b4", shape = 16, size = 0.5) + geom_point(data = filter(combo_mean_tm_pSite_anno, significant_huang, significant), color = "#ff7f00", shape = 16, size = 0.5) + coord_cartesian(xlim = c(40, 65), ylim = c(40, 65)) + ggpmisc::stat_poly_eq(formula = y ~ x, parse = TRUE, size = rsq_size) + labs(x = expression('T'[m]^unmodified* ' '* '('*~degree*C*')'), y = expression('T'[m]^phospho* ' '* '('*~degree*C*')')) + theme_bw() + theme_paper
# annotate ochoa et al. functional scores ochoa_et_al_anno <- readxl::read_xlsx(file.path("../dat", "56035_2_data_set_478471_pxb8gk.xlsx")) ochoa_et_al_mapping <- read_delim(file.path("../dat", "ochoa_mapping.txt"), delim = "\t") ochoa_et_al_func_anno <- readxl::read_xlsx(file.path("../dat", "56035_2_data_set_478475_pxb8l1-tableS3.xlsx")) ochoa_et_al_func_anno_psite <- left_join( ochoa_et_al_func_anno, ochoa_et_al_anno, by = c("uniprot", "position") ) ochoa_et_al_anno_fil <- left_join( ochoa_et_al_func_anno_psite %>% dplyr::select(uniprot, position, residue, functional_score, is_disopred, isHotspot, isInterface), ochoa_et_al_mapping %>% dplyr::select(uniprot = From, gene_name = To), by = "uniprot") %>% mutate(Gene_pSite = paste0(gene_name, "_", "p", residue, position)) combo_mean_tm_pSite_anno_functional_scores <- left_join( combo_mean_tm_pSite_anno %>% rowwise() %>% mutate(mean_delta_meltPoint = mean(c(delta_meltPoint_1, delta_meltPoint_2, delta_meltPoint_3, delta_meltPoint_4, delta_meltPoint_5), na.rm = TRUE)) %>% ungroup, ochoa_et_al_anno_fil %>% na.omit %>% dplyr::select(Gene_pSite, functional_score, is_disopred, isHotspot, isInterface), by = "Gene_pSite")
huang_bulk <- readxl::read_xlsx(file.path("../dat", "41592_2019_499_MOESM3_ESM.xlsx"), skip = 2) %>% dplyr::select(uniprot_id = `Uniprot Accession`, average_tm_nbf = `Average Tm (°C)`) %>% mutate(average_tm_nbf = as.numeric(average_tm_nbf)) huang_phospho_hq_potel_anno <- huang_phospho_hq %>% dplyr::select(uniprot_id = `Uniprot Accession`, Gene_pSite, average_tm_phospho = `Average Phospho-Tm (°C)`, p_value = `ΔTm P-Value`) %>% mutate(significant_potel = Gene_pSite %in% filter(combo_mean_tm_pSite_anno_functional_scores, significant)$Gene_pSite) %>% mutate(average_tm_phospho = as.numeric(average_tm_phospho), p_value = as.numeric(p_value)) %>% left_join(huang_bulk, by = "uniprot_id") p_1a <- ggplot(huang_phospho_hq_potel_anno, aes(average_tm_nbf, average_tm_phospho)) + geom_point(shape = 16, alpha = 0.2, size = 0.5) + geom_line(aes(x,y), data = tibble(x = 40:65, y = 40:65), color = "darkgray", size = 0.25) + geom_point(data = filter(huang_phospho_hq_potel_anno, p_value < 0.05), color = "#1f78b4", size = 0.5, shape = 16) + geom_point(data = filter(huang_phospho_hq_potel_anno, significant_potel), color = "#b2182b", size = 0.5, shape = 16) + geom_point(data = filter(huang_phospho_hq_potel_anno, p_value < 0.05, significant_potel), color = "#ff7f00", size = 0.5, shape = 16) + coord_cartesian(xlim = c(40, 65), ylim = c(40, 65)) + ggpmisc::stat_poly_eq(formula = y ~ x, parse = TRUE, size = rsq_size) + labs(x = expression('T'[m]^unmodified*' '* '('*~degree*C*')'), y = expression('T'[m]^phospho*' '* '('*~degree*C*')')) + theme_bw() + theme_paper
# ochoa functional score on Huang et al data huang_phospho_hq_functional_score <- left_join( huang_phospho_hq %>% dplyr::select(Gene_pSite, average_deltaTm = `Average Phospho-Tm (°C)`, p_value = `ΔTm P-Value`) %>% mutate(average_deltaTm = as.numeric(average_deltaTm), p_value = as.numeric(p_value)) %>% mutate(significant = (p_value < 0.05)), ochoa_et_al_anno_fil %>% na.omit %>% dplyr::select(Gene_pSite, functional_score), by = "Gene_pSite") # combine both datasets to save space for plot func_score_combo_df <- bind_rows( filter(combo_mean_tm_pSite_anno_functional_scores, !is.na(functional_score)) %>% dplyr::select(Gene_pSite, significant, functional_score) %>% mutate(group = "This study"), filter(huang_phospho_hq_functional_score, !is.na(functional_score)) %>% dplyr::select(Gene_pSite, significant, functional_score)%>% mutate(group = "Huang et al.") ) p_1c <- ggplot(func_score_combo_df , aes(significant, functional_score)) + geom_violin(aes(fill = significant), alpha = 0.75, color = NA) + geom_boxplot(outlier.shape = NA, width = 0.25, alpha = 0.5) + ggsignif::geom_signif(comparisons = list(c("TRUE", "FALSE")), textsize = anno_size, size = 0.25) + geom_text(stat = "count", aes(label = ..count..), y = 0.01, size = anno_size) + scale_fill_manual(values = c("gray10", "gray70")) + facet_wrap(~ group) + labs(x = "found significantly de/stabilized", y = "functional score by Ochoa et al.") + coord_cartesian(ylim = c(-0.1, 1.1)) + theme_bw() + theme(legend.position = "none", strip.text = element_text(size = 8)) + theme_paper
# examples from this study p_1d <- plotDiffPhosphoMeltcurve( g_name = "LYN", p_seq = "_VIEDNEpYTAR_", nbf_df = nbf_peptide_filtered, phospho_df = phospho_peptide_tab_filtered, Gene_pSite = "LYN_pY397") + theme_paper p_1e <- plotDiffPhosphoMeltcurve( g_name = "LMNA", p_seq = "_SGAQASSpTPLpSPTR_", nbf_df = nbf_peptide_filtered, phospho_df = phospho_peptide_tab_filtered, Gene_pSite = "LMNA_pT19pS22") + theme_paper p_1f <- plotDiffPhosphoMeltcurve( g_name = "CARHSP1", p_seq = "_GNVVPpSPLPTRR_", nbf_df = nbf_peptide_filtered, phospho_df = phospho_peptide_tab_filtered, Gene_pSite = "CARHSP1_pS41") + theme_paper
# assemble full figure cowplot::plot_grid( cowplot::plot_grid(p_1a, p_1b, p_1c, labels = c("a", "b", "c"), ncol = 3, rel_widths = c(0.3, 0.3, 0.3)), cowplot::plot_grid(p_1d, p_1e, p_1f, labels = c("d", "e", "f"), ncol = 3, rel_widths = c(0.3, 0.3, 0.3)), ncol = 1 ) #ggsave(filename = "main_figure.pdf", width = 18, height = 12, units = "cm")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.