R/datasets.R

# ========== MGFs ==========

#' Proteome Discoverer (PD) global TMT
#'
#' A small subset of MGF entries from the \code{BI_1} for demonstration with
#' \href{https://github.com/qzhang503/proteoM}{proteoM}. The MGF generator is
#' PD.
#'
#' @format A MGF file at a length of 999708 entries.
"pd_bi1_gl_partial"


#' MSConvert IMAC TMT
#'
#' A small subset of MGF entries from the \code{BI_1} for demonstration with
#' \href{https://github.com/qzhang503/proteoM}{proteoM}. The MGF generator is
#' MSConvert.
#'
#' @format A MGF file at a length of 800889 lines.
"msconv_bi1_imac_partial"


# ========== Mascot ==========

#' Mascot global TMT
#'
#' A \code{Mascot} PSM table from the \code{BI_1} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 39537
"F003590"


#' Mascot global TMT
#'
#' A \code{Mascot} PSM table from the \code{BI_2} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 38903
"F003591"


#' Mascot global TMT
#'
#' A \code{Mascot} PSM table from the \code{JHU_1} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 37240
"F003593"


#' Mascot global TMT
#'
#' A \code{Mascot} PSM table from the \code{JHU_2} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 37951
"F003594"


#' Mascot global TMT
#'
#' A \code{Mascot} PSM table from the \code{PNNL_1} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 41550
"F003595"


#' Mascot global TMT
#'
#' A \code{Mascot} PSM table from the \code{PNNL_2} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 42333
"F003597"


#' Mascot phosphopeptide TMT
#' A \code{Mascot} PSM table from the \code{BI_1} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 12761
"F003598"


#' Mascot phosphopeptide TMT
#'
#' A \code{Mascot} PSM table from the \code{BI_2} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 12061
"F003602"


#' Mascot phosphopeptide TMT
#'
#' A \code{Mascot} PSM table from the \code{JHU_1} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 9801
"F003603"


#' Mascot phosphopeptide TMT
#'
#' A \code{Mascot} PSM table from the \code{JHU_2} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 10756
"F003604"


#' Mascot phosphopeptide TMT
#'
#' A \code{Mascot} PSM table from the \code{PNNL_1} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 13089
"F003605"


#' Mascot phosphopeptide TMT
#'
#' A \code{Mascot} PSM table from the \code{PNNL_2} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 11193
"F003606"


# (2) LFQ

#' Mascot IMAC data (LFQ)
#'
#' A \code{Mascot} LFQ PSM table from the 5th and 10th fractions of
#' \code{BI_1, BI_2, JHU_1, JHU_2, PNNL_1, PNN_2} IMAC. The data were searched
#' against a "UniProt Hu and Mm" database.
#'
#' @format A PSM table at a length of 1297758
"F003997_lfq_p"






# ========== MaxQuant ==========

#' MaxQuant global TMT
#'
#' A \code{MaxQuant} PSM table from the \code{BI_1} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 28188
"msms_bi_1"


#' MaxQuant global TMT
#'
#' A \code{MaxQuant} PSM table from the \code{BI_2} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 27285
"msms_bi_2"


#' MaxQuant global TMT
#'
#' A \code{MaxQuant} PSM table from the \code{JHU_1} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 25552
"msms_jhu_1"


#' MaxQuant global TMT
#'
#' A \code{MaxQuant} PSM table from the \code{JHU_2} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 26330
"msms_jhu_2"


#' MaxQuant global TMT
#'
#' A \code{MaxQuant} PSM table from the \code{PNNL_1} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 28470
"msms_pnnl_1"


#' MaxQuant global TMT
#'
#' A \code{MaxQuant} PSM table from the \code{PNNL_2} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 28691
"msms_pnnl_2"


#' MaxQuant phosphopeptide TMT
#'
#' A \code{MaxQuant} PSM table from the \code{BI_1} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 10711
"msms_bi_p1"


#' MaxQuant phosphopeptide TMT
#'
#' A \code{MaxQuant} PSM table from the \code{BI_2} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 10031
"msms_bi_p2"


#' MaxQuant phosphopeptide TMT
#'
#' A \code{MaxQuant} PSM table from the \code{JHU_1} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 8132
"msms_jhu_p1"


#' MaxQuant phosphopeptide TMT
#'
#' A \code{MaxQuant} PSM table from the \code{JHU_2} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 8704
"msms_jhu_p2"


#' MaxQuant phosphopeptide TMT
#'
#' A \code{MaxQuant} PSM table from the \code{PNNL_1} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 11117
"msms_pnnl_p1"


#' MaxQuant phosphopeptide TMT
#'
#' A \code{MaxQuant} PSM table from the \code{PNNL_2} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 9926
"msms_pnnl_p2"


# (2) LFQ

#' MaxQuant IMAC data (LFQ)
#'
#' A \code{MaxQuant} LFQ PSM table from the 5th and 10th fractions of
#' \code{BI_1, BI_2, JHU_1, JHU_2, PNNL_1, PNN_2} IMAC. The data were searched
#' against a "UniProt Hu and Mm" database. Some columns were excluded to reduce
#' the size of the file.
#'
#' @format A PSM table at a length of 90608
"msms_lfq_p"




# ========== proteoM ==========

#' proteoM global TMT
#'
#' A \code{proteoM} PSM table from the \code{BI_1} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 75856
"psmQ_tmt_bi_1"


#' proteoM global TMT
#'
#' A \code{proteoM Mill} PSM table from the \code{BI_2} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 71859
"psmQ_tmt_bi_2"


#' proteoM global TMT
#'
#' A \code{proteoM} PSM table from the \code{JHU_1} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 63031
"psmQ_tmt_jhu_1"


#' proteoM global TMT
#'
#' A \code{proteoM} PSM table from the \code{JHU_2} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 59854
"psmQ_tmt_jhu_2"


#' proteoM global TMT
#'
#' A \code{proteoM} PSM table from the \code{PNNL_1} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 82347
"psmQ_tmt_pnnl_1"


#' proteoM global TMT
#'
#' A \code{proteoM} PSM table from the \code{PNNL_2} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table with header at a length of 82482
"psmQ_tmt_pnnl_2"


# (2) LFQ

#' proteoM IMAC data (LFQ)
#'
#' A \code{proteoM} LFQ PSM table from the 5th and 10th fractions of
#' \code{BI_1, BI_2, JHU_1, JHU_2, PNNL_1, PNN_2} IMAC. The data were searched
#' against a "UniProt Hu and Mm" database. 
#' 
#' Redundancy at neutral losses and variable modifications kept
#'
#' @format A PSM table at a length of 308402
"psmQ_lfq_p"




# ========== MSFragger ==========

# (1) Global TMT

#' MSFragger global TMT
#'
#' A \code{MSFragger} TMT PSM table from the \code{BI_1} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table of 36843 rows and 44 columns
"psm_tmt_bi_1"


#' MSFragger global TMT
#'
#' A \code{MSFragger} TMT PSM table from the \code{BI_2} dataset. The PSM entries were
#' from offline fractions 10 and 15.
#'
#' @format A PSM table of 35854 rows and 44 columns
"psm_tmt_bi_2"


#' MSFragger global TMT
#'
#' A \code{MSFragger} TMT PSM table from the \code{JHU_1} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table of 35657 rows and 44 columns
"psm_tmt_jhu_1"


#' MSFragger global TMT
#'
#' A \code{MSFragger} TMT PSM table from the \code{JHU_2} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table of 36680 rows and 44 columns
"psm_tmt_jhu_2"


#' MSFragger global TMT
#'
#' A \code{MSFragger} TMT PSM table from the \code{PNNL_1} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table of 39244 rows and 44 columns
"psm_tmt_pnnl_1"


#' MSFragger global TMT
#'
#' A \code{MSFragger} TMT PSM table from the \code{PNNL_2} dataset. The PSM entries were
#' sampled at 10% from the full list.
#'
#' @format A PSM table of 40018 rows and 44 columns
"psm_tmt_pnnl_2"


# (2) LFQ

#' MSFragger IMAC data (LFQ)
#'
#' A \code{MSFragger} LFQ PSM table from the \code{BI_1} dataset. The PSM entries were
#' from offline fractions 5 and 10 and searched against UniProt.
#'
#' @format A PSM table of 24403 rows and 32 columns
"psm_lfq_bi_p1"


#' MSFragger IMAC data (LFQ)
#'
#' A \code{MSFragger} LFQ PSM table from the \code{BI_2} dataset. The PSM entries were
#' from offline fractions 5 and 10 and searched against UniProt.
#'
#' @format A PSM table of 22423 rows and 32 columns
"psm_lfq_bi_p2"


#' MSFragger IMAC data (LFQ)
#'
#' A \code{MSFragger} LFQ PSM table from the \code{JHU_1} dataset. The PSM entries were
#' from offline fractions 5 and 10 and searched against UniProt.
#'
#' @format A PSM table of 17242 rows and 32 columns
"psm_lfq_jhu_p1"


#' MSFragger IMAC data (LFQ)
#'
#' A \code{MSFragger} LFQ PSM table from the \code{JHU_2} dataset. The PSM entries were
#' from offline fractions 5 and 10 and searched against UniProt.
#'
#' @format A PSM table of 21984 rows and 32 columns
"psm_lfq_jhu_p2"


#' MSFragger IMAC data (LFQ)
#'
#' A \code{MSFragger} LFQ PSM table from the \code{PNNL_1} dataset. The PSM entries were
#' from offline fractions 5 and 10 and searched against UniProt.
#'
#' @format A PSM table of 26944 rows and 32 columns
"psm_lfq_pnnl_p1"


#' MSFragger IMAC data (LFQ)
#'
#' A \code{MSFragger} LFQ PSM table from the \code{PNNL_2} dataset. The PSM entries were
#' from offline fractions 5 and 10 and searched against UniProt.
#'
#' @format A PSM table of 24197 rows and 32 columns
"psm_lfq_pnnl_p2"







# ========== Others ==========

#' UniProt subcellular locations
#'
#' A table complied from UniProt with columns "Entry" and "Subcellular location [CC]".
#'
#' @format A table of columns "uniprot_acc" and "scc_1", "scc_2" etc. at a
#'   length of 16697.
"scc_hs"


#' UniProt subcellular locations
#'
#' A table compiled from UniProt with columns "Entry" and "Subcellular location [CC]".
#'
#' @format A table of columns "uniprot_acc" and "scc_1", "scc_2" etc. at a
#'   length of 14540.
"scc_mm"

#' UniProt subcellular locations
#'
#' A table compiled from UniProt with columns "Entry" and "Subcellular location [CC]".
#'
#' @format A table of columns "uniprot_acc" and "scc_1", "scc_2" etc. at a
#'   length of 139561
"scc_rn"
qzhang503/proteoQDA documentation built on May 8, 2022, 5:22 a.m.