R/data.R
In proBatch: Tools for Diagnostics and Corrections of Batch Effects in Proteomics

#' Sample annotation data version 1
#'
#' This is data from BXD mouse population aging study with mock instruments to 
#' show how
#' instrument-specific functionality works
#'
#' @format A data frame with 233 rows and 11 variables:
#' \describe{
#'   \item{FullRunName}{name of the file with the measurement for each sample, 
#'   referred to as \code{sample_id_col}}
#'   \item{MS_batch}{mass-spectrometry batch: 4-level factor of 
#'   manually annotated batches}
#'   \item{EarTag}{mouse ID, i.e. ID of the biological object. Only 14 mice have 
#'   been replicated, one mouse was profiled 7 times.}
#'   \item{Strain}{mouse strain ID from BXD population set - 
#'   biological covariate #1, 51 Strain represented}
#'   \item{Diet}{diet, biological covariate #2 - either 
#'   \code{HFD} = `High Fat Diet` or \code{CD} = `Chow Diet`}
#'   \item{Sex}{mice sex - biological covariate #3}
#'   \item{RunDate}{mass-spectrometry running date. In combination 
#'   with \code{RunTime} used for running order determination. Vector of class 
#'   \code{"difftime"} and \code{"hms"}}
#'   \item{RunTime}{mass-spectrometry running time. In combination 
#'   with \code{RunDate} used for running order determination.Vector of class 
#'   \code{"POSIXct"} and \code{"POSIXt"}}
#'   \item{DateTime}{numeric date and time generated by 
#'   \code{date_to_sample_order}}
#'   \item{order}{order of samples generated by sorting DateTime 
#'   in \code{date_to_sample_order}}
#'   \item{digestion_batch}{peptide digestion batch: 4-level 
#'   factor of manually annotated batches}
#'   ...
#' }
"example_sample_annotation"

#' Example protein data in long format  
#'
#' This is OpenSWATH-output data from Aging study with all iRT, spike-in peptides,
#' few representative peptides and proteins for signal improvement demonstration.
#' Using \code{matrix_to_long} can be converted to \code{example_proteome_matrix}
#'
#' @format A data frame with 124655 rows and 7 variables:
#' \describe{
#'   \item{peptide_group_label}{peptide ID, which is regular feature level. 
#'   This column is mostly used as \code{feature_id_col}} used for merging with 
#'   \code{"example_peptide_annotation"}
#'   \item{Intensity}{peptide group intensity in given sample. 
#'   Used in function as \code{measure_col}}
#'   \item{Protein}{Protein group ID, specified as 
#'   N/UniProtID1|UniProtID2|...,
#'   where N is number of protein peptide group maps to. If 
#'   1/UniProtID, then this is proteotypic peptide, in functions used as 
#'   \code{protein_col}}
#'   \item{FullRunName}{name of the file, in most functions used for 
#'   \code{sample_id_col}}
#'   \item{m_score}{column marking the quality of peptide IDs, used as 
#'   \code{qual_col} throughout the script; when \code{qual_value} is 2 in this 
#'   column, peptide has been imputed (requantified)}
#' ...
#' }
#' @source PRIDE ID will be added upon the publication of the dataset
"example_proteome"

#' Example protein data in matrix 
#'
#' This is measurement data from Aging study with columns
#' representing samples and rows representing peptides. Generated by 
#' \code{long_to_matrix}
#'
#' @format A matrix with 535 rows and 233 columns:
#' 
#' @source PRIDE ID will be added upon the publication of the dataset
"example_proteome_matrix"

#' Peptide annotation data
#'
#' This is data from Aging study annotated with gene names
#'
#' @format A data frame with 535 rows and 10 variables:
#' \describe{
#'   \item{peptide_group_label}{peptide group label ID, identical to 
#'   \code{peptide_group_label} in \code{example_proteome}}
#'   \item{Gene}{HUGO gene ID}
#'   \item{ProteinName}{protein group name as specified in 
#'   \code{example_proteome}}
#'   }
"example_peptide_annotation"