# The program ensures that spectra are nested within peptides, and these
# are nested within proteins, and all peptide and protein data are
# contiguous. That is, not split up into separate locations
# This program should be run first on a comma-separeted data file
#
# The first column must contain prot name, and the second must be the peptide
# The columns after "numRefCols" contain relative abundance levels
# There should be "numDataCols" abundance level columns
# # # # # #
proteinDataPrep <- function(protClass, numRefCols, numDataCols) {
names(protClass)[1:2] <- c("prot", "peptide")
# remove "(prot NAME)"
protNamesUpper <- toupper(protClass$prot)
protClass$prot <- protNamesUpper
# strip any whitespace before or after protein name
protClass$prot <- trimws(protClass$prot)
# ensure that there are no extra columns after the data columns
protClassOrig <- protClass
protClass <- protClassOrig[,1:(numRefCols+numDataCols)]
# # # # # # # # # #
# get unique peptides by pasting the protein and peptide names
protSeqProteinModifTemp <- paste(protClass$prot, protClass$peptide, sep="::")
uniquePeptideList <- unique(protSeqProteinModifTemp) # list of unique peptides
uniqueProtList <- unique(protClass$prot)
# order proteins, assuring that the associated peptides travel with them
uniquePeptideOrderInd <- order(protSeqProteinModifTemp)
protClassSort <- protClass[uniquePeptideOrderInd,]
protPep <- paste(protClassSort$prot, protClassSort$peptide, sep="::")
protId <- cumsum(!duplicated(protClassSort$prot)) # gives a unique number to each protein
pepId <- cumsum(!duplicated(protPep)) # unique number to each peptide
# replace plain peptide with protPep, in column 2, to protect against non-unique peptide names
protClassSort[,2] <- protPep
protClassExtend <- data.frame(protClassSort, protId, pepId)
protClassExtend
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.