#' Impute minimum values for AUC XICs for each accession in both test and control runs.
#'
#' @param pd_df A dataframe generated by `format_pd_file()`.
#' @return A dataframe with imputed values for AUC XICs.
impute_fxn <- function(pd_df){
print(pd_df)
pd_impute_df <- pd_df %>%
dplyr::mutate(comb_ctrl = rowSums(pd_df[, grep("^ctrl\\d.*", names(pd_df))])) %>%
dplyr::mutate(comb_exp = rowSums(pd_df[, grep("^exp\\d.*", names(pd_df))])) %>%
dplyr::mutate(min_col = pmin(comb_ctrl, comb_exp, na.rm=TRUE))
pd_impute_df[c("comb_ctrl","comb_exp")][is.na(pd_impute_df[c("comb_ctrl","comb_exp")])] = min(pd_impute_df$min_col, na.rm = TRUE)
return(pd_impute_df)
}
#' Compute fold-change values for XICs and PSMs & Z-scores for PSMs for each accession in both test and control runs.
#'
#' @param pd_impute_df A dataframe generated by `impute_fxn()`.
#' @param one_sided Specify one-sided or two-sided statistical test; default = `FALSE` (two-sided).
#' @return A dataframe with fold-change values for XICs and PSMs & Z-scores for PSMs for each accession in both test and control runs.
compute_fxn <- function(pd_impute_df, one_sided = FALSE){
pd_enriched <- pd_impute_df %>%
# XIC fold change calcuations
dplyr::mutate(mean_abundance = (comb_exp+comb_ctrl)/2) %>%
dplyr::mutate(abundance_fc = case_when(comb_ctrl > comb_exp ~ -comb_ctrl/comb_exp,
comb_ctrl < comb_exp ~ comb_exp/comb_ctrl)) %>%
dplyr::mutate(abundance_log2fc = log2((comb_exp/comb_ctrl))) %>%
# PSM fold change calculations
dplyr::mutate(expPSMs_pseudo = exp_PSMs + 1,
ctrlPSMs_pseudo = ctrl_PSMs + 1) %>%
dplyr::mutate(F0ctrl_pseudo = ctrlPSMs_pseudo / sum(ctrlPSMs_pseudo)) %>%
dplyr::mutate(F0exp_pseudo = expPSMs_pseudo / sum(expPSMs_pseudo)) %>%
dplyr::mutate(PSM_fc = case_when(F0ctrl_pseudo > F0exp_pseudo ~ -F0ctrl_pseudo/F0exp_pseudo,
F0ctrl_pseudo < F0exp_pseudo ~ F0exp_pseudo/F0ctrl_pseudo)) %>%
dplyr::mutate(PSM_log2fc = log2((F0exp_pseudo/F0ctrl_pseudo))) %>%
# PSM z-score calculations
dplyr::mutate(F0_ctrl = ctrl_PSMs / (sum(ctrl_PSMs))) %>%
dplyr::mutate(F0_exp = exp_PSMs / (sum(exp_PSMs))) %>%
dplyr::mutate(F1 = (ctrl_PSMs + exp_PSMs) / (sum(ctrl_PSMs) + sum(exp_PSMs))) %>%
dplyr::mutate(PSM_zscore =
(F0_exp - F0_ctrl)/
sqrt((
(F1 * (1-F1) / sum(exp_PSMs))) +
(F1 * (1-F1) / sum(exp_PSMs))))
# calculate probabilities
if(!one_sided){
pd_enriched <- pd_enriched %>%
dplyr::mutate(pval = pnorm(PSM_zscore)) %>%
dplyr::mutate(fdr_bh = p.adjust(pval, method = "BH", n = length(pval)))
} else {
pd_enriched <- pd_enriched %>%
dplyr::mutate(pval = pnorm(PSM_zscore, lower.tail = FALSE)) %>%
dplyr::mutate(fdr_bh = p.adjust(pval, method = "BH", n = length(pval)))
}
# format final dataframe
pd_enriched <- pd_enriched %>%
dplyr::arrange(desc(PSM_zscore)) %>%
dplyr::rename(total_PSMs = number_PSMs, ctrl_abundance = comb_ctrl,
exp_abundance = comb_exp) %>%
dplyr::select(accession, description, number_of_unique_peptides,
number_of_peptides, total_PSMs, ctrl_abundance,
exp_abundance, mean_abundance, abundance_fc,
abundance_log2fc, ctrl_PSMs, exp_PSMs,
PSM_fc, PSM_log2fc, PSM_zscore, pval, fdr_bh)
print(pd_enriched)
return(pd_enriched)
}
#' Annotate and compute fold-changes and Z-scores for differential proteomics results.
#'
#' Format PSM.txt and Proteins.txt files output by Proteome Discoverer 2.3, formatting PSM counts and AUC XIC values into one table, summing technical replicates, and computing fold-change and Z-scores for each detected protein in test and control runs. Optionally annotates results given a tab-separated annotation file. See README for file formats and example usage on the \href{https://github.com/rachaelcox/enrichr}{enrichr GitHub Repository}.
#'
#' @param exp_id An identifier for a given experiment that matches test and control files together (see meta file).
#' @param meta_file A tab-separate file with column headers `exp_name` (populated with values that correspond to exp_id variable), `type` (exp or ctrl), and `file_id` (determined by Proteome Discoverer, see README on Github).
#' @param psm_file PSM.txt tab-separated file output by Proteome Discoverer 2.3.
#' @param pd_file Proteins.txt tab-separated file output by Proteome Discoverer 2.3.
#' @param one_sided Specify one-sided or two-sided statistical test; default = `FALSE` (two-sided).
#' @param annot_file Optional tab-separated file containing a table with annotations for the queried proteome; must match accession format in FASTA given to Proteome Discoverer as a look up database.
#' @param outfile_name File name for output .csv; default = "output.csv".
#' @return A `.csv` with all computed information combined into one table.
#' @export
enrich <- function(exp_id, meta_file, psm_file, pd_file, one_sided = FALSE, annot_file, outfile_name = "output.csv"){
if(str_sub(outfile_name, -3, -1) != "csv"){
outfile_name = paste0(outfile_name, ".csv")
}
# load in and format Proteome Discoverer files, meta file
psm_df <- format_psm_file(exp_id, meta_file, psm_file)
pd_df <- format_pd_file(exp_id, meta_file, pd_file, psm_df)
# impute missing values
pd_impute_df <- impute_fxn(pd_df)
# sum technical replicates, calculate fold change and log ratios
pd_enriched <- compute_fxn(pd_impute_df, one_sided)
# if no XIC values detected, remove these columns
if(all(pd_enriched$mean_abundance == 0)){
pd_enriched <- pd_enriched %>%
dplyr::select(-matches(".*abundance.*"))
}
# join on annotations if specified
if(!missing(annot_file)){
annot_table <- readr::read_tsv(annot_file) %>%
janitor::clean_names()
pd_enriched <- pd_enriched %>%
dplyr::left_join(annot_table, by=c("accession"))
}
readr::write_csv(pd_enriched, outfile_name)
return(pd_enriched)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.