library(tidyverse) library(stringr) library(readxl) library(knitr) library(broom) library(modelr) library(plotly) library(glue) knitr::opts_knit$set(root.dir = "..")
devtools::load_all(".") filepath <- file.path("tests", "testthat", "test_data", "svm_pred_results_0.03gr.csv") svm_data <- read_svm_data_file(filepath) metadata <- read_excel(file.path("test_scripts", "metadata_CMW.xlsx")) psms <- read_csv(file.path("tests", "testthat", "test_data", "psms.csv")) kable(metadata, d=2)
svm_data_filtered <- svm_data %>% # turnover function rename_proteins(file.path("tests", "testthat", "test_data", "rename_prot.xlsx")) %>% # turnover function filter_peptides_by_spectral_fit(pred_cutoff = 0.75) %>% # turnover function calculate_fraculab() %>% # adjust names (not a turnover function) select(unishort = uniShort, protein = prot, gene, isopep = seqz, sample, frac_ulab, frac_lab) %>% # turnover function #filter_min_timepoints(min_timepoint_present = 3) %>% # add metatdata to get times for samples - should this be a turnover function? left_join(metadata, by = "sample")
# look for data matches svm_data_proteins_only <- svm_data_filtered %>% select(unishort, protein) %>% unique() # could match also by str_c(protein, "_ECOLI") with similar results svm_data_with_uniprot_info <- svm_data_proteins_only %>% left_join(prots, by = c("unishort" = "uniprot_id"))
sector_list <- read_csv(file.path( "test_scripts", "Proteome_sectors_masterlist.csv"), col_types = cols( `Gene name` = col_character(), Sector = col_character())) %>% rename(gene = `Gene name`, sector = Sector) svm_data_with_sector_and_abundance <- svm_data_with_uniprot_info %>% # add sector left_join(sector_list, by = "gene") %>% # add abundance left_join(psms, by = "protein") # could look at these more to find the ones without sector info # View(svm_data_with_sector_and_abundance %>% filter(is.na(sector))) # could look at these more to find the ones that were not in T0 dataset # View(svm_data_with_sector_and_abundance %>% filter(is.na(T0_counts))) svm_data_with_sector_and_abundance %>% group_by(sector) %>% summarize(tally_counts = sum(T0_counts, na.rm = T)) %>% ungroup() %>% mutate( percent = tally_counts/sum(tally_counts) * 100 )
growth_rate = 0.46*60 /894.4 generation_time = log(2) / growth_rate final_data <- calculate_label_rate(svm_data_filtered, quiet = FALSE) %>% filter(enough_data, !fit_error) %>% select(-enough_data, -fit_error) %>% calculate_degrate_dissipation(growth_rate)
Note: resid. SE would look different if tp=0 were not included in the fit (i.e. assuming tp 0 would always be 0% labeled and any measurements that indicates not 0 tp would not be relevant)
plot_label_rate_hist(final_data) plot_label_rate_error(final_data) plot_labeling_curves(final_data, plot_number = 20)
final_table <-final_data %>% left_join(psms, by = "protein") %>% mutate( relative_abundance = (T0_counts / sum(T0_counts, na.rm = TRUE)), weighted_deg = deg_rate * relative_abundance ) %>% select(-nested_data, -fit) #filter data based on... no neg rates??
# we shoudl include something like this with the curves se_groups <- c(1, 2.5, 5, 7.5, 10, 15, 20, 25, 30) # plot data based on standard error to get a sense for cutoffs p <- svm_models %>% filter(!nls_error) %>% mutate( nls_resid_se = 100*map_dbl(nls_summary, ~.x$sigma), se_group = map_dbl(nls_resid_se, ~sum(.x > se_groups) + 1), se_group_name = map_dbl(se_group, ~se_groups[.x]) ) %>% arrange(se_group, desc(nls_resid_se)) %>% group_by(se_group) %>% mutate(se_group_nr = 1:n()) %>% ungroup() %>% # select up to 3 in each group filter(se_group_nr <= 3) %>% #filter(nls_resid_se > 15) %>% # NOTE: select random set of curves in different intervals of resid.SE # randomly select 9 curves arrange(nls_resid_se) %>% mutate(panel = forcats::as_factor(paste("resid. SE [% labeled] about", se_group_name))) %>% # calculate curves mutate( nls_fit_curve = map2(data, nls_fit, ~data_grid(.x, hours = seq_range(hours, 20)) %>% add_predictions(.y, var = "frac_lab")) ) %>% # plot ggplot() + aes(x = hours, y = frac_lab, color = protein, group = isopep, label = nls_resid_se) + geom_line(data = function(x) unnest(x, nls_fit_curve)) + geom_line(data = function(x) unnest(x, data), linetype = 2) + geom_point(data = function(x) unnest(x, data)) + facet_wrap(~panel) ggplotly(p)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.