## This file contains the functions that clean up the input data and calculate the features associated with peaks and peak groups.
#' Convert the exported skyline chromatogram and peak boundary reports to a dataframe format compatible with TargetedMSQC functions.
#'
#' The function takes the directories that contain chromatogram and peak boundary files of Skyline documents and merges them. The rows with NA values for peak boundaries, rows corresponding to peptides with fewer than 3 transitions and transitions missing a standard isotope pair are separated into a dataframe that is returned by the function in output$removed and excluded from subsequence QC steps. The remainder of the rows are returned by the function in output$data for subsequence analysis. Transition peaks of each peptide in each run are formed into peak groups of custom class peakObj and stored in the ChromGroup column. The ApplyPeakBoundary function is applied to chromatograms to restrict them to the provided peak boundaries and the results are stored in the PeakGroup column. Additionally, rows are added for the "sum" transition, which are is the sum of peptides transitions with the same isotope label in each sample.
#'
#' @param chromatogram.path Path to the directory containing the .tsv files of the peak chromatograms. For each Skyline document, this file is exported from Skyline through File > Export > Chromatograms. Here, check runs of interest and include Precursors, Products, Base Peaks and TICs. Each chromatogram .tsv file corresponds to a single Skyline document, which may contain any number of runs. Multiple chromatogram files, corresponding to multiple Skyline documents can be copied into the chromatogram.path directory. For each chromatogram file in this folder, there should be a peak boundary file with an identical name in peak.boundary.path directory.
#' @param peak.boundary.path Path to the directory containing the .csv files of the peak boundaries. For each Skyline document, this file is exported from Skyline through File > Export > Report. Here, select Peak Boundaries. Each peak boundary .csv file corresponds to a single Skyline document, which may contain any number of runs. Multiple peak boundary files, corresponding to multiple Skyline documents can be copied into the peak.boundary.path directory. For each peak boundary file in this folder, there should be a peak chromatogram file with an identical name in chromatogram.path directory.
#' @param labkey.url.base URL of the labkey server, if data is being imported from labkey (labkey = TRUE). This feature has not been implemented yet.
#' @param labkey.url.path Path to the directory containing the Skyline documents on the labkey server, if data is being imported from labkey (labkey = TRUE). This feature has not been implemented yet.
#' @param parallel Logical parameter to determine whether the function should run in parallel. This feature has not been implemented yet. Set parallel = FALSE.
#' @param labkey Logical parameter to indicate whether data will be imported from labkey. This feature has not been implemented yet. Set labkey = FALSE.
#' @param endogenous.label Label of the endogenous analyte: default is "light"
#' @param standard.label Label of the spiked-in isotopically labeled analyte: default is "heavy"
#' @param iRT.list List of iRT standards used in the experiment. These peptides will be removed from the training set.
#'
#' @return A list with the following objects:
#' data: A dataframe that contains all rows of the input data with assigned peak boundaries. This dataframe also includes columns for peakObj objects created for each peak group.
#' removed: A dataframe that contains the rows with missing peak boundary values, too few transitions and missing isotope pairs. These rows are removed from downstream feature extraction and QC analysis.
#'
#' @export
#'
#' @importFrom tidyr spread
#' @importFrom data.table setDT
#' @importFrom data.table rbindlist
#' @importFrom data.table rbindlist
#'
#' @examples
#'
#' extdata.path <- system.file("extdata",package = "TargetedMSQC")
#' project.folder.name <- "CSF_Panel"
#' project.path <- file.path(extdata.path,project.folder.name)
#' chromatogram.path <- file.path(project.path,"Chromatograms")
#' peak.boundary.path <- file.path(project.path,"Peak_boundary")
#' data <- CleanUpChromatograms(chromatogram.path = chromatogram.path,
#' peak.boundary.path = peak.boundary.path,
#' endogenous.label = "light",
#' standard.label = "heavy",
#' iRT.list = c("LGGNETQVR","AGGSSEPVTGLADK","VEATFGVDESANK","YILAGVESNK","TPVISGGPYYER","TPVITGAPYYER","GDLDAASYYAPVR","DAVTPADFSEWSK","TGFIIDPGGVIR","GTFIIDPAAIVR","FLLQFGAQGSPLFK"))
CleanUpChromatograms <- function(chromatogram.path = NULL, peak.boundary.path = NULL, labkey.url.base = NULL, labkey.url.path = NULL, parallel = FALSE, labkey = FALSE, endogenous.label = "light", standard.label = "heavy" , iRT.list = c("LGGNETQVR","AGGSSEPVTGLADK","VEATFGVDESANK","YILAGVESNK","TPVISGGPYYER","TPVITGAPYYER","GDLDAASYYAPVR","DAVTPADFSEWSK","TGFIIDPGGVIR","GTFIIDPAAIVR","FLLQFGAQGSPLFK","LGGNEQVTR","GAGSSEPVTGLDAK","VEATFGVDESNAK","YILAGVENSK","TPVISGGPYEYR","TPVITGAPYEYR","DGLDAASYYAPVR","ADVTPADFSEWSK","GTFIIDPGGVIR","GTFIIDPAAVIR","LFLQFGAQGSPFLK"), ...) {
# parallel = TRUE indicates that function should run in parallel. Currently, the code to use parallel is not implemened/tested and parallel is automatically set to FALSE.
if (parallel) {
warning('The code to handle parallel = TRUE has not been fully implemented and tested yet. parallel is set to FALSE.')
parallel = FALSE
}
# labkey = TRUE indicates that data is going to be read from labkey using the Rlabkey API. Currently, the code to read data from labkey has not been developed and labkey is automatically set to FALSE.
if (labkey) {
warning('The code to handle Labkey = TRUE has not been implemented yet. Labkey is set to FALSE.')
labkey = FALSE
}
if (!labkey) {
# error and warning handling ---------------------------------------
# input errors: if the input are empty
error.input.format1 = simpleError("CleanUpChromatograms: Input chromatogram file should not be empty")
error.input.format2 = simpleError("CleanUpChromatograms: Input peak boundary file should not be empty")
error.input.format3 = simpleError("CleanUpChromatograms: For each chromatograph file in chromatogram.path, there should be a peak boundary file in peak.boundary.path with an identical file name and vice versa")
# chromgram.path and peak.boundary.path can only be NULL if the code is being run on labkey.
if (is.null(chromatogram.path)) stop(error.input.format1)
if (is.null(peak.boundary.path)) stop(error.input.format2)
# check the names of files in chromatogram.path and peak.boundary.path
chromatogram.files <- sort(list.files(chromatogram.path,pattern = "\\.tsv"))
chromatogram.files <- unlist(lapply(chromatogram.files,function(x) substr(x,1,regexpr("\\.[^\\.]*$", x)[[1]] - 1)))
peak.boundary.files <- sort(list.files(peak.boundary.path,pattern = "\\.csv"))
peak.boundary.files <- unlist(lapply(peak.boundary.files,function(x) substr(x,1,regexpr("\\.[^\\.]*$", x)[[1]] - 1)))
if (!identical(chromatogram.files,peak.boundary.files)) stop(error.input.format3)
# read the chromatrogram files
chrom <- setDT(rbindlist(lapply(chromatogram.files,
function(chromatogram.file) {
chromatogram.file.path <- file.path(chromatogram.path,paste0(chromatogram.file,".tsv"))
tmp <- unique(read.table(chromatogram.file.path,sep = "\t", header = TRUE))
tmp$File <- chromatogram.file
return(tmp)
}
)
)
)
# convert isotopelabeltypes to light and heavy. Only these two values are acceptable:
chrom$IsotopeLabelType <- tolower(chrom$IsotopeLabelType)
chrom$IsotopeLabelType <- factor(chrom$IsotopeLabelType,levels = c(endogenous.label,standard.label))
# check if each FragmentIon/Peptide pair is represented by both endogenous and standard labels in each sample
tmp <- chrom %>%
select(File,FileName,PeptideModifiedSequence,PrecursorCharge,FragmentIon,ProductCharge,IsotopeLabelType, TotalArea) %>%
spread(IsotopeLabelType,TotalArea)
tmp <- as.data.frame(tmp)
tmp$RemoveIsotopePair <- FALSE
tmp[is.na(tmp[,endogenous.label]) | is.na(tmp[,standard.label]),"RemoveIsotopePair"] <- TRUE
chrom <- chrom %>%
left_join(tmp[,!colnames(tmp) %in% c(endogenous.label,standard.label,"<NA>")],by = c("File","FileName","PeptideModifiedSequence","PrecursorCharge","FragmentIon","ProductCharge"))
# mark the rows with any isotopelabeltype except endogenous. and standard.label are marked for removal
chrom[is.na(chrom$IsotopeLabelType),"RemoveIsotopePair"] <- TRUE
# read the peak boundary file
peak.boundary <- setDT(rbindlist(lapply(peak.boundary.files,
function(peak.boundary.file) {
peak.boundary.file.path <- file.path(peak.boundary.path,paste0(peak.boundary.file,".csv"))
tmp <- unique(read.csv(peak.boundary.file.path,sep = ",", header = TRUE))
tmp$File <- peak.boundary.file
return(tmp)
}
)
)
)
peak.boundary <- as.data.frame(peak.boundary)
if (nrow(chrom) == 0) stop(error.input.format1)
if (nrow(peak.boundary) == 0) stop(error.input.format2)
# change the column names of the peak boundary
colnames(peak.boundary) = gsub("\\.","",colnames(peak.boundary))
# change the column names of the chrom
colnames(chrom) = gsub("\\.","",colnames(chrom))
# change the class of time columns to numeric
numeric.cols = c("MinStartTime","MaxEndTime")
peak.boundary[,numeric.cols] = apply(peak.boundary[,numeric.cols],2,function(x) as.numeric(as.character(x)))
# mark the peaks with missing peak boundaries or the ones with multiple peak boundaries for removal. Note that sometimes skyline exports multiple lines for a peak, where one line is NA. In these cases, I keep the non-NA line and remove the NA line as redundant.
peak.boundary <- unique(peak.boundary)
peak.boundary$RemovePeakBoundary <- FALSE
peak.boundary$Redundant <- FALSE
peak.boundary <- peak.boundary %>% group_by(FileName,File,PeptideModifiedSequence) %>% mutate(n = n())
peak.boundary[(is.na(peak.boundary$MinStartTime) | is.na(peak.boundary$MaxEndTime)) & (peak.boundary$n > 1),"Redundant"] <- TRUE
peak.boundary <- peak.boundary[!peak.boundary$Redundant,] %>% select(-Redundant)
peak.boundary <- peak.boundary %>% group_by(FileName,File,PeptideModifiedSequence) %>% mutate(n = n())
peak.boundary[is.na(peak.boundary$MinStartTime) | is.na(peak.boundary$MaxEndTime) | (peak.boundary$n > 1),"RemovePeakBoundary"] <- TRUE
peak.boundary <- peak.boundary %>% select(-n)
# merge peak boundary and chromatrogram
data <- chrom %>% left_join(peak.boundary,by = c("File","FileName","PeptideModifiedSequence","PrecursorCharge"))
data$Remove <- data$RemoveIsotopePair | data$RemovePeakBoundary
}
# Rows where peak boundaries are NA are separated into a data frame and returned as output.
removed <- data %>%
filter(Remove | PeptideModifiedSequence %in% iRT.list) %>%
select(File,FileName,PeptideModifiedSequence,PrecursorCharge,FragmentIon,ProductCharge,IsotopeLabelType,MinStartTime,MaxEndTime,RemoveIsotopePair,RemovePeakBoundary,Remove)
data <- data %>% filter(!Remove & !(PeptideModifiedSequence %in% iRT.list)) %>%
select(FileName,PeptideModifiedSequence,PrecursorCharge,FragmentIon,ProductCharge,IsotopeLabelType,TotalArea,Times,Intensities,MinStartTime,MaxEndTime,File)
#
if (nrow(data) == 0) {
warnings("data is not appropriately formatted for TargetedMSQC. Please check the RemoveIsotopePair and RemovePeakBoundary columns in output$removed.")
} else {
# substitute NA values in the TotalArea column with 0.
if (sum(is.na(data$TotalArea)) > 0) {
warnings("NA values are detected in the peak area column of the input and replaced with 0")
data$TotalArea[is.na(data$TotalArea)] = 0
}
# build the chromatogram group for each (File,FileName,peptide,precursorcharge) trio
peak.groups = data %>%
group_by(File,FileName,PeptideModifiedSequence,PrecursorCharge) %>%
do(ChromGroup = BuildPeakGroup(.))
data = unique(merge(data,peak.groups))
# build the peak group for each (File,FileName,peptide,precursorcharge) trio: the only difference with the chromatogram group is that the peak boundaries are applied.
if (parallel) {
data$PeakGroup = mcmapply(ApplyPeakBoundary,data$ChromGroup,boundary = as.list(data.frame(t(cbind(data$MinStartTime,data$MaxEndTime)))),
mc.silent = FALSE, mc.cores = detectCores() - 1)
} else {
data$PeakGroup = mapply(ApplyPeakBoundary,data$ChromGroup,boundary = as.list(data.frame(t(cbind(data$MinStartTime,data$MaxEndTime)))))
}
# remove those rows where the peak is not a proper peak object. This can happen if the peak boundaries are too narrow and the peak has only 3 or fewer points across it.
removed.na.peaks <- data %>%
filter(is.na(PeakGroup)) %>%
select(File,FileName,PeptideModifiedSequence,PrecursorCharge,FragmentIon,ProductCharge,IsotopeLabelType,MinStartTime,MaxEndTime)
if (nrow(removed.na.peaks) > 0) {
removed.na.peaks$RemoveIsotopePair <- FALSE
removed.na.peaks$RemovePeakBoundary <- TRUE
removed.na.peaks$Remove <- TRUE
}
removed <- rbind(removed,removed.na.peaks)
data <- data %>% filter(!is.na(PeakGroup))
# add the peak area column to the data frame
data$PeakArea <- NULL
for (i in 1:nrow(data)) {
tmp <- paste(data$FragmentIon[[i]],data$ProductCharge[[i]],data$IsotopeLabelType[[i]],sep = ".")
data$PeakArea[i] <- data$PeakGroup[[i]]@area[[tmp]]
}
# calculate the sum of AUCs of individual transitions and assign to SumArea
data = data %>%
select(File,FileName,PeptideModifiedSequence,PrecursorCharge,FragmentIon,ProductCharge,IsotopeLabelType,PeakArea) %>%
group_by(PeptideModifiedSequence,PrecursorCharge,IsotopeLabelType, FileName,File) %>%
summarise(SumArea = sum(PeakArea)) %>%
right_join(data,by = c("PeptideModifiedSequence","PrecursorCharge", "IsotopeLabelType","FileName","File"))
#
# calculate sum of transitions with the FragmentIon name of "sum". Add the new "sum" transition rows to the dataframe
tmp = data %>%
select(File,FileName,PeptideModifiedSequence,PrecursorCharge,FragmentIon,ProductCharge,IsotopeLabelType,PeakGroup,MinStartTime,MaxEndTime) %>%
group_by(File,FileName,PeptideModifiedSequence,PrecursorCharge) %>%
summarise(Peak = PeakGroup[1],
MinStartTime = MinStartTime[1],
MaxEndTime = MaxEndTime[1])
if (parallel) {
tmp$PeakGroup = mcmapply(function(x,endogenous.label,standard.label) CalculateTransitionSum(x,endogenous.label,standard.label),
tmp$Peak,endogenous.label = endogenous.label,standard.label = standard.label,
mc.silent = FALSE, mc.cores = detectCores() - 1)
} else {
tmp$PeakGroup = mapply(function(x,endogenous.label,standard.label)
CalculateTransitionSum(x,endogenous.label,standard.label),
tmp$Peak,endogenous.label = endogenous.label,standard.label = standard.label)
}
tmp = tmp %>% select(-Peak)
# for "sum" transitions assign default values to appropriate columns
tmp = tmp %>%
mutate(FragmentIon = as.factor("sum")) %>%
mutate(ProductCharge = as.integer(0)) %>%
mutate(IsotopeLabelType = as.factor(endogenous.label)) %>%
mutate(Times = as.factor(NA)) %>%
mutate(Intensities = as.factor(NA)) %>%
mutate(ChromGroup = as.factor(NA))
# peak area for sum transitions is the sum of the PeakArea of all transitions. This value is calculated for endogenous and standard sums separately
if (parallel) {
tmp$PeakArea = parSapply(cl,tmp$PeakGroup, function(peak) {
area = peak@area
return(area[paste0("sum.0.",endogenous.label)])
})
} else {
tmp$PeakArea = sapply(tmp$PeakGroup, function(peak) {
area = peak@area
return(area[paste0("sum.0.",endogenous.label)])
})
}
# SumArea is the sum of indivodual transitions for each peak and sample and is equal to TotalAre of "sum" transitions
tmp$SumArea = tmp$PeakArea
# append the new rows to data frame
data <- data %>% select(-TotalArea)
tmp = data.frame(tmp[,colnames(data)])
data = rbind.data.frame(data,tmp)
# do the same for heavy sum isotopes. The only difference is in the IsotopeLabelType and PeakArea
tmp$IsotopeLabelType <- standard.label
if (parallel) {
tmp$PeakArea <- parSapply(cl,tmp$PeakGroup, function(peak) {
area <- peak@area
return(area[paste0("sum.0.",standard.label)])
})
} else {
tmp$PeakArea <- sapply(tmp$PeakGroup, function(peak) {
area = peak@area
return(area[paste0("sum.0.",standard.label)])
})
}
# SumArea is the sum of indivodual transitions for each peak and sample and is equal to TotalAre of "sum" transitions
tmp$SumArea <- tmp$PeakArea
# append the sum transitions for heavy isotopes
data <- rbind.data.frame(data,tmp) %>% ungroup()
data <- unique(data)
}
# deactivate the cluster
if (parallel) stopCluster(cl)
# return output
return(list(data = data,removed = removed))
}
#' Calculate an ensemble of QC features/metrics for a dataset that has been cleaned up by CleanUpChromatograms.
#'
#' The function takes the output of CleanUpChromatograms as input and for each transition pair of each peptide, calculates a number of QC metrics that are used as features for training a predictive peak QC model.
#'
#' @param data A dataframe that contains identifier columns and peak groups of class peakObj for each transition peak. This dataframe is the output of CleanUpChromatograms (output$data).
#' @param parallel Logical parameter to determine whether the function should run in parallel. This feature has not been implemented yet. Set parallel = FALSE.
#' @param blanks A dataframe with two columns of File and FileName. The FileName column contains the names of blank runs for each Skyline File in the File column. The values in File and FileName should be consistent with corresponding columns in input data. Example: If Skyline document "SkylineFile1" includes three blank runs of "Blank1", "Blank2" and "Blank3", blanks <- data.frame(File = "SkylineFile1", FileName = c("Blank1","Blank2","Blank3")). The function uses the blank runs to estimate the peak intensity at LLOQ for each pepide and transition. If blanks = NA, the function uses the intensity.threshold by default as approximation of intensity at LLOQ for all peaks. This threshold can be adjusted.
#' @param intensity.threshold The threshold that the function uses as an approximation of the intensity at LLOQ for all peptides. If blank samples are not provided to estimate the intensity at LLOQ, this value is used as an approximation to determine transition peaks that are below limit of quantitation.
#' @param endogenous.label Label of the endogenous analyte: default is "light"
#' @param standard.label Label of the spiked-in isotopically labeled analyte: default is "heavy"
#' @param export.features Logical parameter to indicate whether calculated features should be exported as a .csv file.
#' @param feature.path Path to the directory, where the features should be saved if export.features = TRUE.
#'
#'
#' @return A list with the following objects:
#' features: A dataframe with columns that contain the calculated QC features for each transition pair in the input data.
#'
#' @export
#'
#' @importFrom tidyr spread
#' @importFrom data.table dcast
#' @importFrom data.table setDT
#'
#' @examples
#'
#' data.features <- ExtractFeatures(data = data.CSF$data,
#' export.features = FALSE,
#' intensity.threshold = 1000)
ExtractFeatures <- function(data, parallel = FALSE, blanks = NA, intensity.threshold = 100000, endogenous.label = "light", standard.label = "heavy", export.features = FALSE, feature.path = "", ...) {
# parallel = TRUE indicates that function should run in parallel. Currently, the code to use parallel is not implemened/tested and parallel is automatically set to FALSE.
if (parallel) {
warning('The code to handle parallel = TRUE has not been fully implemented and tested yet. parallel is set to FALSE.')
parallel = FALSE
}
# if the dataset includes blank sample, use those to estimate the intensity at lloq by mean(intensity) + 10*sd(intensity) and mark what measurements are above the lloq level. If blanks are not available, just use the default value of 1e6 as your threshold. Based on looking at a dataset ~90% of peptides have an lloq.intesnity below 1e6 for individual fragment ions.
if (!is.na(blanks)) {
blank = data %>%
filter(interaction(File,FileName) %in% interaction(blanks$File,blanks$FileName)) %>%
group_by(File,PeptideModifiedSequence,PrecursorCharge,IsotopeLabelType,FragmentIon,ProductCharge) %>%
summarise(lloq.intensity = mean(PeakArea) + 10*sd(PeakArea))
data = data %>%
left_join(blank,by = c("File","PeptideModifiedSequence","PrecursorCharge","IsotopeLabelType","FragmentIon","ProductCharge"))
data$aboveThreshold = data$PeakArea > data$lloq.intensity
data = data %>% select(-lloq.intensity)
}
else {
data$aboveThreshold = data$PeakArea > intensity.threshold
}
if (parallel) {
# Calculate the number of cores
no_cores <- detectCores() - 1
# Initiate cluster
cl <- makeCluster(no_cores)
# export the unknown functions to individual processors
# clusterExport(cl,"corr.test")
}
if (parallel) {
# calculate jaggedness for each peak group
tmp = parSapply(cl,data$PeakGroup,CalculatePeakJaggedness)
data$PeakGroupJaggedness.r = tmp[1,]
data$PeakGroupJaggedness.m = as.numeric(tmp[2,])
# calculate symmetry for each peak group
tmp = parSapply(cl,data$PeakGroup,CalculatePeakSymmetry)
data$PeakGroupSymmetry.r = tmp[1,]
data$PeakGroupSymmetry.m = as.numeric(tmp[2,])
# export the unknown functions to individual processors
# clusterExport(cl,"corr.test")
# calculate shape similarity for each peak group
tmp = parSapply(cl,data$PeakGroup,CalculatePeakShapeSimilarity)
data$PeakGroupSimilarity.r = tmp[1,]
data$PeakGroupSimilarity.m = as.numeric(tmp[2,])
# export the unknown functions to individual processors
# clusterExport(cl,"CalculateElutionShift") # toghiess: if any error, uncomment this, if not remove the lie
# calculate the elution shift for each peak group
tmp = parSapply(cl,data$PeakGroup,CalculatePeakElutionShift)
data$PeakGroupShift.r = tmp
# calculate fwhm and fwhm2base ratio for each peak group
tmp = parSapply(cl,data$PeakGroup,CalculateFWHM)
data$PeakGroupFWHM.r = tmp[1,]
data$PeakGroupFWHM.m = as.numeric(tmp[2,])
data$PeakGroupFWHM2base.r = tmp[3,]
data$PeakGroupFWHM2base.m = as.numeric(tmp[4,])
# calculate modality ratio for each peak group
tmp = parSapply(cl,data$PeakGroup,CalculateModality)
data$PeakGroupModality.r = tmp[1,]
data$PeakGroupModality.m = as.numeric(tmp[2,])
# calculate max intensity for each transition
tmp = parSapply(cl,data$PeakGroup,CalculatePeakMaxIntensity)
data$TransitionMaxIntensity = mcmapply(function(r,trn) r[trn],
tmp,
trn = paste(data$FragmentIon,data$ProductCharge,data$IsotopeLabelType,sep = "."),
mc.silent = FALSE, mc.cores = detectCores() - 1)
# calculate max intensity at boundary for each transition
tmp = parSapply(cl,data$PeakGroup,CalculateMaxBoundaryIntensity)
data$TransitionMaxBoundaryIntensity = mcmapply(function(r,trn) r[trn],
tmp,
trn = paste(data$FragmentIon,data$ProductCharge,data$IsotopeLabelType,sep = "."),
mc.silent = FALSE, mc.cores = detectCores() - 1)
# extract jaggedness for each transition from the jaggedness.r matrix
data$TransitionJaggedness = mcmapply(function(r,trn) r[trn],
data$PeakGroupJaggedness.r,
trn = paste(data$FragmentIon,data$ProductCharge,data$IsotopeLabelType,sep = "."),
mc.silent = FALSE, mc.cores = detectCores() - 1)
# extract symmetry for each transition from the symmetry.r matrix
data$TransitionSymmetry = mcmapply(function(r,trn) r[trn],
data$PeakGroupSymmetry.r,
trn = paste(data$FragmentIon,data$ProductCharge,data$IsotopeLabelType,sep = "."),
mc.silent = FALSE, mc.cores = detectCores() - 1)
# extract shift for each transition from shift.r matrix using
data$TransitionShift = mcmapply(function(r,trn) r[trn,trn],
data$PeakGroupShift.r,
trn = paste(data$FragmentIon,data$ProductCharge,data$IsotopeLabelType,sep = "."),
mc.silent = FALSE, mc.cores = detectCores() - 1)
# extract similarity between light and heavy isotope for each transition from the similarity.r matrix
data$PairSimilarity = mcmapply(function(r,trn.endg,trn.std) r[trn.endg,trn.std],
data$PeakGroupSimilarity.r,
trn.endg = paste(data$FragmentIon,data$ProductCharge,endogenous.label,sep = "."),
trn.std = paste(data$FragmentIon,data$ProductCharge,standard.label,sep = "."),
mc.silent = FALSE, mc.cores = detectCores() - 1)
# extract elution shift between light and heavy isotope for each transition from the shift.r matrix
data$PairShift = mcmapply(function(r,trn.endg,trn.std) r[trn.endg,trn.std],
data$PeakGroupShift.r,
trn.endg = paste(data$FragmentIon,data$ProductCharge,endogenous.label,sep = "."),
trn.std = paste(data$FragmentIon,data$ProductCharge,standard.label,sep = "."),
mc.silent = FALSE, mc.cores = detectCores() - 1)
# extract fwhm2base ratio for each transition from the fwhm2base.r matrix
data$TransitionFWHM2base = mcmapply(function(r,trn) r[trn],
data$PeakGroupFWHM2base.r,
trn = paste(data$FragmentIon,data$ProductCharge,data$IsotopeLabelType,sep = "."),
mc.silent = FALSE, mc.cores = detectCores() - 1)
# extract fwhm for each transition from the fwhm.r matrix
data$TransitionFWHM = mcmapply(function(r,trn) r[trn],
data$PeakGroupFWHM.r,
trn = paste(data$FragmentIon,data$ProductCharge,data$IsotopeLabelType,sep = "."),
mc.silent = FALSE, mc.cores = detectCores() - 1)
# extract modality for each transition from the modality.r matrix
data$TransitionModality = mcmapply(function(r,trn) r[trn],
data$PeakGroupModality.r,
trn = paste(data$FragmentIon,data$ProductCharge,data$IsotopeLabelType,sep = "."),
mc.silent = FALSE, mc.cores = detectCores() - 1)
# extract average jaggedness for all light (heavy) transitions in each peak group from the jaggedness.r matrix
data$IsotopeJaggedness = mcmapply(function(r,isotope) round(mean(r[grep(isotope,names(r))]),digits = 4),
data$PeakGroupJaggedness.r,
isotope = data$IsotopeLabelType,
mc.silent = FALSE, mc.cores = detectCores() - 1)
# extract average symmetry for all light (heavy) transitions in each peak group from the symmetry.r matrix
data$IsotopeSymmetry = mcmapply(function(r,isotope) round(mean(r[grep(isotope,names(r))]),digits = 4),
data$PeakGroupSymmetry.r,
isotope = data$IsotopeLabelType,
mc.silent = FALSE, mc.cores = detectCores() - 1)
# extract average similarity for all light (heavy) transitions in each peak group from the similarity.r matrix
data$IsotopeSimilarity = mcmapply(function(r,isotope) round(mean(abs(r[grep(isotope,colnames(r)),grep(isotope,rownames(r))])),digits = 4),
data$PeakGroupSimilarity.r,
isotope = data$IsotopeLabelType,
mc.silent = FALSE, mc.cores = detectCores() - 1)
# extract average elution shift for all light (heavy) transitions in each peak group from the shift.r matrix
data$IsotopeShift = mcmapply(function(r,isotope) round(mean(abs(r[grep(isotope,colnames(r)),grep(isotope,rownames(r))])),digits = 4),
data$PeakGroupShift.r,
isotope = data$IsotopeLabelType,
mc.silent = FALSE, mc.cores = detectCores() - 1)
# extract average fwhm2base ratio for all light (heavy) transitions in each peak group from the fwhm2base.r matrix
data$IsotopeFWHM2base = mcmapply(function(r,isotope) round(mean(r[grep(isotope,names(r))]),digits = 4),
data$PeakGroupFWHM2base.r,
isotope = data$IsotopeLabelType,
mc.silent = FALSE, mc.cores = detectCores() - 1)
# extract average fwhm for all light (heavy) transitions in each peak group from the fwhm.r matrix
data$IsotopeFWHM = mcmapply(function(r,isotope) round(mean(r[grep(isotope,names(r))]),digits = 4),
data$PeakGroupFWHM.r,
isotope = data$IsotopeLabelType,
mc.silent = FALSE, mc.cores = detectCores() - 1)
# extract average modality for all light (heavy) transitions in each peak group from the modality.r matrix
data$IsotopeModality = mcmapply(function(r,isotope) round(mean(r[grep(isotope,names(r))]),digits = 4),
data$PeakGroupModality.r,
isotope = data$IsotopeLabelType,
mc.silent = FALSE, mc.cores = detectCores() - 1)
# extract average co-elution for all transitions in each peak group from the shift.r matrix
data$PeakGroupShift = parSapply(cl,data$PeakGroupShift.r, function(r) round(mean(diag(abs(r))),digits = 4))
}
else {
# calculate jaggedness for each peak group
tmp = sapply(data$PeakGroup,CalculatePeakJaggedness)
data$PeakGroupJaggedness.r = tmp[1,]
data$PeakGroupJaggedness.m = as.numeric(tmp[2,])
# calculate symmetry for each peak group
tmp = sapply(data$PeakGroup,CalculatePeakSymmetry)
data$PeakGroupSymmetry.r = tmp[1,]
data$PeakGroupSymmetry.m = as.numeric(tmp[2,])
# calculate shape similarity for each peak group
tmp = sapply(data$PeakGroup,CalculatePeakShapeSimilarity)
data$PeakGroupSimilarity.r = tmp[1,]
data$PeakGroupSimilarity.m = as.numeric(tmp[2,])
# calculate the elution shift for each peak group using the CalculatePeakElutionShift method
tmp = sapply(data$PeakGroup,CalculatePeakElutionShift)
data$PeakGroupShift.r = tmp
# calculate fwhm and fwhm2base ratio for each peak group
tmp = sapply(data$PeakGroup,CalculateFWHM)
data$PeakGroupFWHM.r = tmp[1,]
data$PeakGroupFWHM.m = as.numeric(tmp[2,])
data$PeakGroupFWHM2base.r = tmp[3,]
data$PeakGroupFWHM2base.m = as.numeric(tmp[4,])
# calculate modality for each peak group
tmp = sapply(data$PeakGroup,CalculateModality)
data$PeakGroupModality.r = tmp[1,]
data$PeakGroupModality.m = as.numeric(tmp[2,])
# calculate max intensity for each transition
tmp = sapply(data$PeakGroup,CalculatePeakMaxIntensity)
data$TransitionMaxIntensity = mapply(function(r,trn) r[trn],
tmp,
trn = paste(data$FragmentIon,data$ProductCharge,data$IsotopeLabelType,sep = "."))
# calculate max intensity at boundary for each transition
tmp = sapply(data$PeakGroup,CalculateMaxBoundaryIntensity)
data$TransitionMaxBoundaryIntensity = mapply(function(r,trn) r[trn],
tmp,
trn = paste(data$FragmentIon,data$ProductCharge,data$IsotopeLabelType,sep = "."))
# calculate max intensity at boundary normalized by max intensity for each transition
data$TransitionMaxBoundaryIntensityNormalized = data$TransitionMaxBoundaryIntensity/data$TransitionMaxIntensity
# impute TransitionMaxBoundaryIntensityNormalized values of NaN to 0. This happens transition is all zeros.
data$TransitionMaxBoundaryIntensityNormalized[!is.finite(data$TransitionMaxBoundaryIntensityNormalized)] <- 0
# extract jaggedness for each transition from the jaggedness.r matrix
data$TransitionJaggedness = mapply(function(r,trn) r[trn],
data$PeakGroupJaggedness.r,
trn = paste(data$FragmentIon,data$ProductCharge,data$IsotopeLabelType,sep = "."))
# extract symmetry for each transition from the symmetry.r matrix
data$TransitionSymmetry = mapply(function(r,trn) r[trn],
data$PeakGroupSymmetry.r,
trn = paste(data$FragmentIon,data$ProductCharge,data$IsotopeLabelType,sep = "."))
# extract shift for each transition from shift.r matrix using
data$TransitionShift = mapply(function(r,trn) r[trn,trn],
data$PeakGroupShift.r,
trn = paste(data$FragmentIon,data$ProductCharge,data$IsotopeLabelType,sep = "."))
# extract similarity between light and heavy isotope for each transition from the similarity.r matrix
data$PairSimilarity = mapply(function(r,trn.endg,trn.std) r[trn.endg,trn.std],
data$PeakGroupSimilarity.r,
trn.endg = paste(data$FragmentIon,data$ProductCharge,endogenous.label,sep = "."),
trn.std = paste(data$FragmentIon,data$ProductCharge,standard.label,sep = "."))
# extract elution shift between light and heavy isotope for each transition from the shift.r matrix
data$PairShift = mapply(function(r,trn.endg,trn.std) r[trn.endg,trn.std],
data$PeakGroupShift.r,
trn.endg = paste(data$FragmentIon,data$ProductCharge,endogenous.label,sep = "."),
trn.std = paste(data$FragmentIon,data$ProductCharge,standard.label,sep = "."))
# extract fwhm2base for each transition from the fwhm2base.r matrix
data$TransitionFWHM2base = mapply(function(r,trn) r[trn],
data$PeakGroupFWHM2base.r,
trn = paste(data$FragmentIon,data$ProductCharge,data$IsotopeLabelType,sep = "."))
# extract fwhm for each transition from the fwhm.r matrix
data$TransitionFWHM = mapply(function(r,trn) r[trn],
data$PeakGroupFWHM.r,
trn = paste(data$FragmentIon,data$ProductCharge,data$IsotopeLabelType,sep = "."))
# extract modality for each transition from the modality.r matrix
data$TransitionModality = mapply(function(r,trn) r[trn],
data$PeakGroupModality.r,
trn = paste(data$FragmentIon,data$ProductCharge,data$IsotopeLabelType,sep = "."))
# extract average jaggedness for all light (heavy) transitions in each peak group from the jaggedness.r matrix
data$IsotopeJaggedness = mapply(function(r,isotope) round(mean(r[grep(isotope,names(r))]),digits = 4),
data$PeakGroupJaggedness.r,
isotope = data$IsotopeLabelType)
# extract average symmetry for all light (heavy) transitions in each peak group from the symmetry.r matrix
data$IsotopeSymmetry = mapply(function(r,isotope) round(mean(r[grep(isotope,names(r))]),digits = 4),
data$PeakGroupSymmetry.r,
isotope = data$IsotopeLabelType)
# extract average similarity for all light (heavy) transitions in each peak group from the similarity.r matrix
data$IsotopeSimilarity = mapply(function(r,isotope) round(mean(abs(r[grep(isotope,colnames(r)),grep(isotope,rownames(r))])),digits = 4),
data$PeakGroupSimilarity.r,
isotope = data$IsotopeLabelType)
# extract average co-elution for all light (heavy) transitions in each peak group from the shift.r matrix
data$IsotopeShift = mapply(function(r,isotope) round(mean(diag(matrix(abs(r[grep(isotope,colnames(r)),grep(isotope,rownames(r))])))),digits = 4),
data$PeakGroupShift.r,
isotope = data$IsotopeLabelType)
# extract average fwhm2base for all light (heavy) transitions in each peak group from the fwhm2base.r matrix
data$IsotopeFWHM2base = mapply(function(r,isotope) round(mean(r[grep(isotope,names(r))]),digits = 4),
data$PeakGroupFWHM2base.r,
isotope = data$IsotopeLabelType)
# extract average fwhm for all light (heavy) transitions in each peak group from the fwhm.r matrix
data$IsotopeFWHM = mapply(function(r,isotope) round(mean(r[grep(isotope,names(r))]),digits = 4),
data$PeakGroupFWHM.r,
isotope = data$IsotopeLabelType)
# extract average modality for all light (heavy) transitions in each peak group from the modality.r matrix
data$IsotopeModality = mapply(function(r,isotope) round(mean(r[grep(isotope,names(r))]),digits = 4),
data$PeakGroupModality.r,
isotope = data$IsotopeLabelType)
# extract average co-elution for all transitions in each peak group from the shift.r matrix
data$PeakGroupShift = sapply(data$PeakGroupShift.r, function(r) round(mean(diag(abs(r))),digits = 4))
}
# calculate the center for each peak which is equivalent to RT for the peak
data$PeakCenter = (data$MaxEndTime + data$MinStartTime)/2
# calculate the ratio consistency between the endogenous and standard for each sample and peptide and tranistion:
# PairRatioConsistency = abs(endogenous - standard)/standard
# Area2SumRatio is the ratio of eahc transition to sum of all transitions in each sample for endogenous and standard isotopes
data = data %>% ungroup()
data = data %>%
mutate(Area2SumRatio = PeakArea/SumArea)
# impute Area2SumRaio values of NaN to 0. This happens when all the peaks are zero resulting in a SumArea of 0.
data$Area2SumRatio[!is.finite(data$Area2SumRatio)] <- 0
tmp = data %>%
select(PeptideModifiedSequence,PrecursorCharge,File,FileName,FragmentIon,ProductCharge,Area2SumRatio,IsotopeLabelType) %>%
spread(key = IsotopeLabelType,value = Area2SumRatio)
colnames(tmp)[grepl(endogenous.label,colnames(tmp))] = "endogenous"
colnames(tmp)[grepl(standard.label,colnames(tmp))] = "standard"
data = tmp %>%
mutate(PairRatioConsistency = abs(endogenous - standard)/standard) %>%
select(PeptideModifiedSequence,PrecursorCharge,File,FileName,FragmentIon,ProductCharge,PairRatioConsistency) %>%
right_join(data,by = c("PeptideModifiedSequence", "FragmentIon","PrecursorCharge","ProductCharge","FileName","File"))
# impute the values that are not finite (NA,NaN (0/0) and Inf (number/0)) to the maximum observed for PairRatioConsistency
data$PairRatioConsistency[!is.finite(data$PairRatioConsistency)] <- max(data$PairRatioConsistency[is.finite(data$PairRatioConsistency)],na.rm = TRUE)
# calculate the peak width consistency between the light and heavy for each sample and peptide and tranistion
# PairFWHMConsistency: abs(endogenous - standard)/standard
tmp = data %>%
select(PeptideModifiedSequence,PrecursorCharge,File,FileName,FragmentIon,ProductCharge,TransitionFWHM,IsotopeLabelType) %>%
spread(key = IsotopeLabelType,value = TransitionFWHM)
colnames(tmp)[grepl(endogenous.label,colnames(tmp))] = "endogenous"
colnames(tmp)[grepl(standard.label,colnames(tmp))] = "standard"
data = tmp %>%
mutate(PairFWHMConsistency = abs(endogenous - standard)/standard) %>%
select(PeptideModifiedSequence,PrecursorCharge,File,FileName,FragmentIon,ProductCharge,PairFWHMConsistency) %>%
right_join(data,by = c("PeptideModifiedSequence", "FragmentIon","PrecursorCharge","ProductCharge","FileName","File"))
# impute the values that are not finite (NA,NaN (0/0) and Inf (number/0)) to the maximum observed for PairFWHMConsistency
data$PairFWHMConsistency[!is.finite(data$PairFWHMConsistency)] <- max(data$PairFWHMConsistency[is.finite(data$PairFWHMConsistency)],na.rm = TRUE)
# calculate the area ratio consistency of each transition with average of area ratios in all samples
# MeanArea2SumRatio: mean of Area2SumRatio across all samples is calculated for each transition and isotope label.
# MeanIsotopeRatioConsistency: abs(Area2SumRatio - MeanArea2SumRatio)/MeanArea2SumRatio)
data = data %>%
select(PeptideModifiedSequence,PrecursorCharge,File,FileName,FragmentIon,ProductCharge,Area2SumRatio,IsotopeLabelType,aboveThreshold) %>%
filter(aboveThreshold) %>%
group_by(PeptideModifiedSequence,PrecursorCharge,File,FragmentIon,ProductCharge,IsotopeLabelType) %>%
summarise(MeanArea2SumRatio = mean(Area2SumRatio)) %>%
right_join(data,by = c("PeptideModifiedSequence","FragmentIon","PrecursorCharge","ProductCharge","IsotopeLabelType","File"))
data = data %>%
mutate(MeanIsotopeRatioConsistency = abs(Area2SumRatio - MeanArea2SumRatio)/MeanArea2SumRatio)
# impute the NA MeanIsotopeRatioConsistency values to max of this column. This happens when for a certain fragment ion of a peptide, nothing is detected above a certain threshold (either determined by intensity at lloq or chosen arbitrarily by the user). In these cases we just assume that the peak is too low to be used for determining the quality of the peak. Also impute values that are not finite (NA,NaN (0/0) and Inf (number/0))
data$MeanIsotopeRatioConsistency[!is.finite(data$MeanIsotopeRatioConsistency)] <- max(data$MeanIsotopeRatioConsistency[is.finite(data$MeanIsotopeRatioConsistency)],na.rm = TRUE)
# calculate the peak width consistency of each transition with average of peak widths in all samples
# MeanTransitionFWHM: mean of TransitionFWHM across all samples is calculated for each transition and isotope label.
# MeanIsotopeFWHMConsistency: abs(TransitionFWHM - MeanTransitionFWHM)/MeanTransitionFWHM)
data = data %>%
select(PeptideModifiedSequence,PrecursorCharge,File,FileName,FragmentIon,ProductCharge,TransitionFWHM,IsotopeLabelType,aboveThreshold) %>%
filter(aboveThreshold) %>%
group_by(PeptideModifiedSequence,PrecursorCharge,File,FragmentIon,ProductCharge,IsotopeLabelType) %>%
summarise(MeanTransitionFWHM = mean(TransitionFWHM)) %>%
right_join(data,by = c("PeptideModifiedSequence","FragmentIon","PrecursorCharge","ProductCharge","IsotopeLabelType","File"))
data = data %>%
mutate(MeanIsotopeFWHMConsistency = abs(TransitionFWHM - MeanTransitionFWHM)/MeanTransitionFWHM)
# impute the NA MeanIsotopeFWHMConsistency values to max of this column. This happens when for a certain fragment ion of a peptide, nothing is detected above a certain threshold (either determined by intensity at lloq or chosen arbitrarily by the user). In these cases we just assume that the peak is to low to be used for determining the quality of the peak. Also impute values that are not finite (NA,NaN (0/0) and Inf (number/0))
data$MeanIsotopeFWHMConsistency[!is.finite(data$MeanIsotopeFWHMConsistency)] <- max(data$MeanIsotopeFWHMConsistency[is.finite(data$MeanIsotopeFWHMConsistency)],na.rm = TRUE)
# calculate the peak center (RT) consistency of each transition with average of RT in all samples
# MeanPeakCenter: mean of PeakCenter across all samples is calculated for each transition and isotope label.
# MeanIsotopeRTConsistency: abs(PeakCenter - MeanPeakCenter)/MeanPeakCenter
data = data %>%
select(PeptideModifiedSequence,PrecursorCharge,File,FileName,FragmentIon,ProductCharge,PeakCenter,IsotopeLabelType,aboveThreshold) %>%
filter(aboveThreshold) %>%
group_by(PeptideModifiedSequence,PrecursorCharge,File,FragmentIon,ProductCharge,IsotopeLabelType) %>%
summarise(MeanPeakCenter = mean(PeakCenter)) %>%
right_join(data,by = c("PeptideModifiedSequence","FragmentIon","PrecursorCharge","ProductCharge","IsotopeLabelType","File"))
data = data %>%
mutate(MeanIsotopeRTConsistency = abs(PeakCenter - MeanPeakCenter)/MeanPeakCenter)
# impute the NA MeanIsotopeRTConsistency values to max of this column. This happens when for a certain fragment ion of a peptide, nothing is detected above a certain threshold (either determined by intensity at lloq or chosen arbitrarily by the user). In these cases we just assume that the peak is to low to be used for determining the quality of the peak. Also impute values that are not finite (NA,NaN (0/0) and Inf (number/0))
data$MeanIsotopeRTConsistency[!is.finite(data$MeanIsotopeRTConsistency)] <- max(data$MeanIsotopeRTConsistency[is.finite(data$MeanIsotopeRTConsistency)],na.rm = TRUE)
# calculate the correlation between the light and heavy Area2SumRatios
# the sum transitions are first filtered out, and the correlation coefficiant between the Area2SumRatio of light and heavy isotopes are calculated for each peptide and sample
# also calculate the absolute value of log of Area2SumRatio between endogenous and standard. This helps identify transitions that have interference or different area ratios between the isotope pairs
tmp = data %>%
filter(FragmentIon != "sum") %>%
select(File,FileName,PeptideModifiedSequence,PrecursorCharge,FragmentIon,ProductCharge,IsotopeLabelType,Area2SumRatio) %>%
spread(key = IsotopeLabelType,value = Area2SumRatio)
colnames(tmp)[grepl(endogenous.label,colnames(tmp))] = "endogenous"
colnames(tmp)[grepl(standard.label,colnames(tmp))] = "standard"
data = suppressWarnings(tmp %>%
group_by(PeptideModifiedSequence,PrecursorCharge, FileName,File) %>%
summarise(PeakGroupRatioCorr = cor(endogenous,standard,method = "pearson")) %>%
right_join(data,by = c("PeptideModifiedSequence", "PrecursorCharge","FileName","File")))
# impute NA PeakGroupRatioCorr values to 0. The NA values are a result of all zero signals or peptides with only one transition
data$PeakGroupRatioCorr[is.na(data$PeakGroupRatioCorr)] <- 0
# calculate the endogenous to standard ratio for each transition
tmp = data %>%
select(File,FileName,PeptideModifiedSequence,PrecursorCharge,FragmentIon,ProductCharge,IsotopeLabelType,PeakArea) %>%
spread(key = IsotopeLabelType,value = PeakArea)
colnames(tmp)[grepl(endogenous.label,colnames(tmp))] = "endogenous"
colnames(tmp)[grepl(standard.label,colnames(tmp))] = "standard"
data = tmp %>%
mutate(Endogenous2StandardRatio = endogenous/standard) %>%
select(File,FileName,PeptideModifiedSequence,PrecursorCharge,FragmentIon,ProductCharge,Endogenous2StandardRatio) %>%
right_join(data,by = c("File","FileName","PeptideModifiedSequence", "PrecursorCharge","FragmentIon","ProductCharge"))
# for each transition, calculate the CV% of the area to sum ration across all samples
data = data %>%
select(File,FileName,PeptideModifiedSequence,PrecursorCharge,FragmentIon,ProductCharge,IsotopeLabelType,Area2SumRatio) %>%
group_by(File,PeptideModifiedSequence,PrecursorCharge,FragmentIon,ProductCharge,IsotopeLabelType) %>%
mutate(Area2SumRatioCV = sd(Area2SumRatio,na.rm = TRUE)/mean(Area2SumRatio,na.rm = TRUE)) %>%
select(-Area2SumRatio) %>%
right_join(data,by = c("File","FileName","PeptideModifiedSequence", "PrecursorCharge","FragmentIon","ProductCharge","IsotopeLabelType"))
# deactivate the cluster
if (parallel) stopCluster(cl)
# extract the feature columns
features = data %>%
select(File,FileName,PeptideModifiedSequence,PrecursorCharge,FragmentIon,ProductCharge,IsotopeLabelType,Area2SumRatioCV,PeakGroupRatioCorr,PairFWHMConsistency,PairRatioConsistency,PeakGroupJaggedness.m,PeakGroupSymmetry.m,PeakGroupSimilarity.m,PeakGroupShift,PeakGroupFWHM.m,PeakGroupFWHM2base.m,PeakGroupModality.m,TransitionJaggedness,TransitionSymmetry,PairSimilarity,PairShift,TransitionFWHM2base,TransitionFWHM,TransitionModality,IsotopeJaggedness,IsotopeSymmetry,IsotopeSimilarity,IsotopeShift,IsotopeFWHM2base,IsotopeFWHM,IsotopeModality,MeanIsotopeRatioConsistency,MeanIsotopeFWHMConsistency,MeanIsotopeRTConsistency,TransitionMaxIntensity,TransitionMaxBoundaryIntensity,TransitionMaxBoundaryIntensityNormalized,TransitionShift)
# change the IsotopeLabelType to endogenous and standard
features$IsotopeLabelType <- as.character(features$IsotopeLabelType)
features$IsotopeLabelType[features$IsotopeLabelType == endogenous.label] = "endogenous"
features$IsotopeLabelType[features$IsotopeLabelType == standard.label] = "standard"
features$IsotopeLabelType <- factor(features$IsotopeLabelType,levels = c("endogenous","standard"))
# convert the feature.data to wide format
features.to.cast <- colnames(features)[!(colnames(features) %in% c("File","FileName","PeptideModifiedSequence","PrecursorCharge","FragmentIon","ProductCharge","IsotopeLabelType"))]
features <- dcast(setDT(features),File + FileName + PeptideModifiedSequence + PrecursorCharge + FragmentIon + ProductCharge ~ IsotopeLabelType,value.var = features.to.cast)
# removing duplicated columns (this happens for the features where the values of standard and endogenous are identical e.g. the peakgroup features)
features <- features[ ,which(!duplicated(t(features))),with = FALSE]
# if export.features is true save the features in the csv file specified by feature.path
if (export.features == TRUE) {
# template file name
feature.file <- file.path(feature.path,"features.csv")
# if the template path does not exist, create it
if (!dir.exists(feature.path)) {
dir.create(feature.path, showWarnings = TRUE, recursive = FALSE, mode = "0777")
}
write.table(features, file = feature.file, sep = ",",row.names = FALSE)
}
# return output
return(list(features = features))
}
#' Extract the peak from a chromatogram by restricting the chromatogram to peak boundaries.
#'
#' The function takes a peak group of class peakObj and the peak boundary as input. The input peak group is usually a chromatogram with a time range that is wider than the peak of interest. This function extracts peak of interest from the chromatogram by limiting the time and signal intensities of the peak to a range that is within the provided peak boundaries.
#'
#' @param peak A peak group of class peakObj
#' @param boundary a numeric vector of size two with min start time and max end time of the peak (peak boundaries)
#'
#' @return A peak group of class peakObj with time and intensity vectors that are within the boundaries of the peak
#'
#' @export
#'
#' @examples
#'
#' chrom <- data.CSF$data$ChromGroup[[1]]
#' PlotChromPeak(chrom)
#' peak <- ApplyPeakBoundary(chrom,c(data.CSF$data$MinStartTime[[1]],data.CSF$data$MaxEndTime[[1]]))
#' PlotChromPeak(peak)
ApplyPeakBoundary <- function(peak, boundary,...) {
# error and warning handling ---------------------------------------
# input errors: if the input peak is empty
error.input.format <- simpleError("ApplyPeakBoundary: input peak should be non-empty and of class peakObj")
if (is.na(peak)) stop(error.input.format)
# input errors: if the boundary is not a numeric vector of length 2
error.boundary.format <- simpleError("ApplyPeakBoundary: boudary should be a numeric vector of length 2")
if (length(boundary) != 2) stop(error.boundary.format)
# if the peak boundaries are NA
warning.na.boundary <- simpleWarning("ApplyPeakBoundary: the peak boundaries are NA")
if (sum(is.na(boundary)) > 0) {
warnings(warning.na.boundary)
# return the original peak as input
return(peak)
}
# function body ---------------------------------------
# determine the timepoints and the signal within the boundaries
# if there are only 3 time points within the peak boundary pad it with one additional datapoint. If there are fewer that 3, the peak is removed from qc analysis.
time.index <- which(peak@time > boundary[1] & peak@time < boundary[2])
if (length(time.index) == 3) {
if (time.index[1] > 1) time.index <- c(time.index[1] - 1,time.index)
else time.index <- c(time.index,tail(time.index,1) + 1)
}
time <- peak@time[time.index]
sig <- peak@sig %>%
slice(time.index)
# calculate peak area using the trapozoidal approximation
area <- sapply(sig,function(x) pracma::trapz(time,x))
# create a new peak with the time and sig data points within the boundaries
peak <- tryCatch({
peakObj(time = time, sig = sig, area = area)
}, error = function(e) {
return(NA)
}
)
return(peak)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.