#' internal function that transforms/transposes a set of named lists into a
#' data.frame with the list names as column names
#'
#' @param xmlList the named set of lists (can be just a single one) to be
#' transformed. Usually (part of) the output of xmlToList from the
#' XML package. Please note that all lists should have the same element
#' names. If not the resulting data.frame will most likely have a weird
#' strucutre with (partially) NA filled columns
#' @param elementName allows for the selection of named elements of the list
#' object
#' @param leaveout default value is ".attrs", usually this is the closing
#' element of a set of lists coming from xmlToList
#'
#' @return data.frame
#'
#' @noRd
intoTable <- function(xmlList, elementName = NA, leaveOut = ".attrs"){
if (identical(elementName, NA)){
if (leaveOut %in% names(xmlList)){
# multi line xmlTable
numberofElements <- length(xmlList)-1
return(dplyr::bind_rows(
lapply(1:numberofElements,
function(x){
xmlList[[x]]
})))
} else {
# single line xmlTable
return(dplyr::bind_rows(xmlList))
}
} else {
return(dplyr::bind_rows(xmlList[names(xmlList) == elementName]))
}
}
#' function that gets the first element of the AnalysisDefinitionXML
#' column from the AnalysisDefinition table in a .pdResult file
#'
#' @param db database access 'handle' pointing to a .pdResult file
#' @return a named tree like list that contains the info like file names,
#' study factors, correction factors, etc etc
#'
#' @export
analysisDefinition <- function(db){
return(XML::xmlToList((
dbGetTable(db = db,
tablename = "AnalysisDefinition"))$AnalysisDefinitionXML[1]))
}
#' function that extracts the factors used in the study to generate the
#' .pdResult file. The result contains some internal info in the form
#' of columns named id (identifiers).
#'
#' @param analysisDef generated by the analysisDefinition function
#'
#' @return data.frame with the info
#'
#' @export
studyDefinitionFactors <- function(analysisDef){
if (length(analysisDef$StudyDefinition$Factors) == 0){
return(NA)
} else {
result <- dplyr::bind_rows(
lapply(
analysisDef$StudyDefinition$Factors, function(x){x[[".attrs"]]})) %>%
dplyr::select("Kind", "Name", "Description", "Id") %>%
dplyr::rename_with(tolower) %>%
dplyr::rename(type = "kind")
result$factors <- unname(lapply(analysisDef$StudyDefinition$Factors,
function(x){dplyr::bind_rows(x$FactorOptions) %>%
dplyr::select("Kind", "Value", "Id") %>%
dplyr::rename_with(tolower) %>%
dplyr::rename(type = "kind")}))
return(result)
}
}
#' function that extracts file information on the original .raw files
#' used to generate the .pdResult file. Information includes the
#' original file name, location & size. It also contains some internal info in
#' the form of columns named id (identifiers).
#'
#' @param analysisDef generated by the analysisDefinition function
#' @param splitFileSize boolean (default: TRUE), specifies if the FileSize
#' column should be split into the actual file size (still a character vector)
#' and the file size format
#' @param joinedTables boolean (default: TRUE), specifies if all info should be
#' put in a single data.frame. If FALSE it will generate a list of two
#' data.frame objects; this might be useful in some scenarios
#'
#' @return data.frame or list of two data.frame objects
#'
#' @export
studyDefinitionFileSets <- function(analysisDef,
splitFileSize = TRUE,
joinedTables = TRUE){
if (length(analysisDef$StudyDefinition$FileSets) == 0){
return(NA)
} else {
result <- list()
result[[1]] <- dplyr::bind_rows(lapply(analysisDef$StudyDefinition$FileSets, function(x){x[[".attrs"]]}))
result[[2]] <- dplyr::bind_rows(lapply(analysisDef$StudyDefinition$FileSets, function(x){x[["Files"]]$File}))
if (splitFileSize){
result[[2]] <- result[[2]] %>%
tidyr::separate(col = "FileSize", into = c("FileSize","FileSizeFormat"), sep = " ")
}
if (joinedTables){
return(dplyr::bind_cols(result))
} else {
return(result)
}
}
}
#' function that extracts quantification method information if a quantification
#' method was used to generate the .pdResult file
#'
#' @param analysisDef generated by the analysisDefinition function
#'
#' @return A list of two data.frame objects. The first one will contain the
#' name, description, etc. The second one will specify the names of the labels
#' used. The result will be NA in the case that no quantification method was
#' used.
#'
#' @export
studyDefinitionQuanMethods <- function(analysisDef){
if (length(analysisDef$StudyDefinition$QuanMethods) == 0){
return(NA)
} else {
result <- list()
result[[1]] <- dplyr::bind_rows(analysisDef$StudyDefinition$QuanMethods[["QuanMethod"]][[".attrs"]])
result[[2]] <- dplyr::bind_rows(analysisDef$StudyDefinition$QuanMethods[["QuanMethod"]][["QuanChannels"]])
return(result)
}
}
#' function that extracts sample information. The information seems to be a bit
#' redundant, as the info is also seen in other tables.
#'
#' @param analysisDef generated by the analysisDefinition function
#'
#' @return a data.frame
#'
#' @export
studyDefinitionSamples <- function(analysisDef){
return(dplyr::bind_rows(lapply(analysisDef$StudyDefinition$Samples,
function(x){x[[".attrs"]]})))
}
#' internal function used by \empf{studyDefinitionExtensions}. Extracts the
#' correction factors for the mass tags/reporter ions from the provided XML
#' object
#'
#' @param resultXML xmlobject containing the correction factor information
#'
#' @return a data.frame of the isoptic correction factors
#'
#' @noRd
getCorrections <- function(resultXML){
elements <- lapply(resultXML[["MethodPart"]],
function(x){
if (".attrs" %in% names(x)){
return(x[[".attrs"]])
} else {
return(NA)
}})
elements <- elements[!is.na(elements)]
methodParts <- resultXML[["MethodPart"]][which(names(elements) == "MethodPart")]
correctionTable <- data.frame()
for (counter in 1:length(methodParts)){
rowElements <- unlist(lapply(1:4, function(x){methodParts[[counter]][[x]][["text"]]}))
names(rowElements) <- unname(unlist(lapply(1:4, function(x){methodParts[[counter]][[x]][[".attrs"]]})))
correctionTable <- dplyr::bind_rows(correctionTable,rowElements)
}
correctionFactors <- list()
for (counter in 1:length(methodParts)){
correctionFactors[[counter]] <- data.frame()
for (counter2 in 1:(length(methodParts[[counter]])-1)){
rowElements <- lapply(1:(length(methodParts[[counter]][["MethodPart"]][[counter2]])-1),
function(z){methodParts[[counter]][["MethodPart"]][[counter2]][[z]][["text"]]})
names(rowElements) <- unname(unlist(lapply(1:(length(methodParts[[counter]][["MethodPart"]][[counter2]])-1), function(z){methodParts[[counter]][["MethodPart"]][[counter2]][[z]][[".attrs"]]})))
correctionFactors[[counter]] <- dplyr::bind_rows(correctionFactors[[counter]], dplyr::bind_rows(rowElements))
}
}
for (counter in 1:length(correctionFactors)){
namesCorrections <- correctionFactors[[counter]]$CorrectionFactorName
correctionFactors[[counter]] <- correctionFactors[[counter]]$Factor
names(correctionFactors[[counter]]) <- namesCorrections
}
result <- dplyr::bind_cols(correctionTable, dplyr::bind_rows(correctionFactors)) %>%
dplyr::select(-!!rlang::sym("TagID"),-rlang::sym("IsActive"))
colnames(result)[1:2] <- c("MassTag","ReporterIon")
return(result)
}
#' function that extracts information on isotope corrections (if available)
#'
#' @param analysisDef generated by the analysisDefinition function
#' @param correctXML can only have two different values: NA or a two element
#' character vector c("utf-16","utf-8"). During the research into the
#' method descriptions in the XML object it was noticed that the
#' XML::xmlToList gave an error Document labelled UTF-16 but has
#' UTF-8 content. This was solved by replacing the 'utf-16' string by 'utf-8'
#' string in the XML object. This may be a country specific issue, so the
#' function allows setting this parameter to NA will not do the replacement.
#'
#' @return NA or a list of two data.frame objects
#'
#' @export
studyDefinitionExtensions <- function(analysisDef, correctXML = c("utf-16","utf-8")){
if (length(analysisDef$StudyDefinition$StudyDefinitionExtensions[["StudyDefinitionExtension"]][["QuantitationMethods"]]) == 1){
return(NA)
} else {
result <- analysisDef$StudyDefinition$StudyDefinitionExtensions[["StudyDefinitionExtension"]][["QuantitationMethods"]][["QuantitationMethod"]][["text"]]
if (identical(correctXML, NA)){
return(XML::xmlToList(result))
} else {
resultXML <- XML::xmlToList(stringr::str_replace(result,
pattern = correctXML[1],
replacement = correctXML[2]))
result <- list()
result[[1]] <- dplyr::bind_rows(resultXML[[".attrs"]])
result[[2]] <- getCorrections(resultXML)
names(result) <- c("Method","CorrectionFactors" )
return(result)
}
}
}
#' internal function used by the quanRatio function to separate info from
#' a quanRatio numerator or denominator
#'
#' @param numOrdenom either a numerator or denominator element
#' @param numerator boolean: if TRUE then numOrdenom is a numerator, otherwise
#' it's a denominator
#'
#' @return a list of information to be further processed by quanratio
#'
#' @noRd
sampleGroupIn <- function(numOrdenom, numerator){
if (sum(
unlist(
lapply(names(numOrdenom),
function(x){
return(!(x %in% c("StudyVariablesForGrouping","Samples","Replicates",".attrs" )))}))) != 0){
warning("Possible unusual numerator/denominator element")
}
x <- list(numOrdenom$StudyVariablesForGrouping %>%
as.data.frame() %>%
dplyr::slice(-1) %>% # remove Version
dplyr::rename_at("StudyVariable",
~ifelse(numerator,
"Numerator",
"Denominator")), # rename column
numOrdenom$Samples,
numOrdenom$Replicates) # always seems to be NULL
names(x) <- names(numOrdenom)[1:3]
return(x)
}
#' internal function which extracts the (part of the) info on a numerator or
#' denominator from a quanRatio element
#'
#' @param sampleInfos numerator or denominator sample element from a quanRatio
#'
#' @noRd
sampleInfo <- function(sampleInfos){
result <- list()
result[[1]] <- dplyr::bind_rows(sampleInfos$StudyVariables)
result[[2]] <- dplyr::bind_rows(sampleInfos[[".attrs"]])
names(result) <- c("info1", "info2")
return(result)
}
#' internal function to extract info about the numerator or denominator samples
#' (puts together also info from sampleInfos function)
#' @param numOrDenom numerator or denominator sample element from a quanRatio
#'
#' @return data.frame with the info on the ratio numerator or denominator,
#' among other things it specifies the samples/files used
#'
#' @noRd
samplesInfo <- function(numOrDenom){
tempResult <- lapply(numOrDenom, sampleInfo)
result <- dplyr::bind_rows(lapply(tempResult, function(x){x$info2})) %>%
dplyr::select(-rlang::sym("Version")) %>%
dplyr::select("SampleName", tidyselect::everything())
for (counter in 1:length(tempResult)){
tempResult[[counter]]$info1 <- tempResult[[counter]]$info1 %>%
dplyr::select(-rlang::sym("Version"), -rlang::sym("Kind")) %>%
dplyr::rename_at("Value", ~paste(c("Value",1), collapse = "")) %>%
dplyr::mutate(Name = stringr::str_replace_all(!!rlang::sym("Name"), pattern = " ",
replacement = "_")) %>%
t() %>%
data.frame()
colnames(tempResult[[counter]]$info1) <- tempResult[[counter]]$info1[1,]
tempResult[[counter]]$info1 <- tempResult[[counter]]$info1 %>%
dplyr::slice(-1)
rownames(tempResult[[counter]]$info1) <- NULL
}
tempResult <- dplyr::bind_rows(lapply(tempResult, function(x){x$info1}))
result <- dplyr::bind_cols(result, tempResult)
return(result)
}
#' internal function used by quanRatio to extract info about the
#' replicates in a quanRatio
#'
#' @param replicateInfos coming from the quanRatiosElement parameter of the
#' quanRatio function
#'
#' @return a list of numerator & denominator data.frame objects
#'
#' @noRd
replicateInfo <- function(replicateInfos){
result <- list()
result[[1]] <- NA
result[[2]] <- NA
names(result) <- c("Numerator","Denominator")
if (!is.null(replicateInfos$SampleGroupInNumerator$Replicates)){
result[[1]] <- list()
result[[1]][[1]] <- dplyr::bind_rows(purrr::map(replicateInfos$SampleGroupInNumerator$Replicates,
~samplesInfo(.x$Samples)))
result[[1]][[2]] <- dplyr::bind_rows(purrr::map(replicateInfos$SampleGroupInNumerator$Replicates,
~dplyr::bind_rows(.x$ReplicateStudyVariable) %>%
dplyr::select(-Version)))
names(result[[1]]) <- c("Table","Factors")
}
if (!is.null(replicateInfos$SampleGroupInDenominator$Replicates)){
result[[2]] <- list()
result[[2]][[1]] <- dplyr::bind_rows(purrr::map(replicateInfos$SampleGroupInDenominator$Replicates,
~samplesInfo(.x$Samples)))
result[[2]][[2]] <- dplyr::bind_rows(purrr::map(replicateInfos$SampleGroupInDenominator$Replicates,
~dplyr::bind_rows(.x$ReplicateStudyVariable) %>%
dplyr::select(-Version)))
names(result[[2]]) <- c("Table","Factors")
}
return(result)
}
#' internal function used for the extraction of information from the individual
#' quanRatios
#'
#' @param quanRatiosElement one of the quanRatio coming from the quanRatios
#' element (see quanRatios)
#'
#' @return list of information in a single quanRatio
#'
#' @noRd
quanRatio <- function(quanRatiosElement){
if (sum(
unlist(
lapply(names(quanRatiosElement),
function(x){
return(!(x %in% c("SampleGroupInNumerator",
"SampleGroupInDenominator",
"ReplicateRatios",".attrs")))}))) != 0){
warning("Possible unusual quanRatio element")
}
x <- list(sampleGroupIn(quanRatiosElement$SampleGroupInNumerator$SampleGroup,
numerator = TRUE),
sampleGroupIn(quanRatiosElement$SampleGroupInDenominator$SampleGroup,
numerator = FALSE),
quanRatiosElement$ReplicaRatios) # not used, seems to always be NULL
names(x) <- names(quanRatiosElement)[1:3]
x[[length(x)+1]] <-
dplyr::bind_cols(x$SampleGroupInNumerator$StudyVariablesForGrouping,
x$SampleGroupInDenominator$StudyVariablesForGrouping)
names(x)[length(x)] <- "RatioTable"
x[[length(x)+1]] <- paste(c(x$RatioTable["Value","Numerator"],
"/",
x$RatioTable["Value","Denominator"]),
collapse = "")
names(x)[length(x)] <- "RatioString"
x[[length(x)+1]] <- samplesInfo(quanRatiosElement$SampleGroupInNumerator$SampleGroup$Samples)
names(x)[length(x)] <- "NumeratorSamples"
x[[length(x)+1]] <- samplesInfo(quanRatiosElement$SampleGroupInDenominator$SampleGroup$Samples)
names(x)[length(x)] <- "DenominatorSamples"
x[[length(x)+1]] <- replicateInfo(x)
names(x)[length(x)] <- "Replicates" # note: seems to always contain redundant
# info, that is to say: info that is
# already in rest of quanRatiosElement
return(x[-c(1,2,3)])
}
#' internal function for the extraction of the ratios for the quantification
#'
#' @param XMLData should be StudyAnalysisExtensionSettings/
#' AnalysisDefinitionExtensionSettings/ GroupingAndQuantificationExtension
#' part of the analysisDefinition generated by the analysisDefinition
#' function
#'
#' @return QuanRatios list for the third list item returned by
#' studyDefinitionExtensionSettings
#'
#' @noRd
quanRatios <- function(XMLData){
result <- lapply(XMLData$QuanRatios, quanRatio)
names(result) <- unname(purrr::map_chr(result,~.x$RatioString))
return(result)
}
#' function to extract sample/factor/ratio/replicate information.
#'
#' @param analysisDef generated by the analysisDefinition function
#'
#' @return a lits of 4 elements:
#' \enumerate{
#' \item StudyVariablesForGrouping : a data.frame of factors used
#' \item StudyVariablesForSorting : a data.frame of sorting specification for
#' the factors
#' \item QuanRatios : a list object of all ratios. Each ratio has the
#' following elements: RatioTable (specifying numerator/denominator),
#' RatioString (for easy info printing), NumeratorSamples &
#' DenominatorSamples specifying which samples are in the numerator and
#' denominator and finally Replicates which contains info on replicates.
#' \item XML : the actual from which the information comes. This was included
#' because the exact specification for all possible cases is not (yet) known
#' }
#'
#' @note So far, this function has not been tested for all possible cases/
#' scenarios.
#'
#' @export
studyDefinitionExtensionSettings <- function(analysisDef){
tempResult <- analysisDef$StudyAnalysisExtensionSettings$AnalysisDefinitionExtensionSettings$GroupingAndQuantificationExtension
if (is.null(tempResult$StudyVariablesForGrouping) & is.null(tempResult$StudyVariablesForSorting) & is.null(tempResult$QuanRatios)){
return(NA)
} else {
result <- list()
result[[1]] <- intoTable(tempResult$StudyVariablesForGrouping$StudyVariable)
result[[2]] <-
intoTable(tempResult$StudyVariablesForSorting$StudyVariableForSorting)
if (!purrr::is_empty(tempResult$QuanRatios)){
result[[3]] <- quanRatios(tempResult)
} else {
result[[3]] <- NA
}
result[[4]] <- tempResult
names(result) <- c(names(tempResult)[1:3],"XML")
return(result)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.