rmzTab-m: R Package Client for mzTab-M Reference Implementation and Validation API

Documented in extractComments extractMetadata extractSmallMoleculeEvidence extractSmallMoleculeFeatures extractSmallMoleculeSummary readMzTab readMzTabString

#' Read mzTab-M file into data frame.
#' @description Read an mzTab tab separated file from the passed in file.
#' @param filename the mzTab file to parse.
#' @export
readMzTab <- function(filename) {
  # read maximum number of columns in file
  ncol <- max(stats::na.omit(utils::count.fields(file=filename, sep = "\t")))
  if (is.infinite(ncol)) {
    stop(paste("Number of columns is infinite.", paste0("Please check file: ", filename)))
  }
  mztab.table = utils::read.table(file=filename, header=FALSE,
                    row.names=NULL, dec = ".", fill = TRUE,
                    col.names = paste0("V", seq_len(ncol)),
                    sep="\t", na.strings="null", quote = "", comment.char = "#",
                    blank.lines.skip = TRUE, strip.white = TRUE,
                    colClasses = "character")
  mztab.table
}

#' Read an mzTab-M string into a data frame.
#' @description Read an mzTab tab separated file from the passed in string.
#' This file creates a temporary file on disk to check for the maximum number of columns.
#' The returned object is a data.frame as returned by utils::read.table. 
#' 
#' @param mzTabString the mzTab string to parse.
#' @export
readMzTabString <- function(mzTabString) {
  file <- tempfile(fileext=".mzTab.tmp")
  base::write(mzTabString, file=file)
  # read maximum number of columns in file
  ncol <- max(stats::na.omit(utils::count.fields(file=file, sep = "\t")))
  mztab.table = utils::read.table(file=file, header=FALSE,
                                  row.names=NULL, dec = ".", fill = TRUE,
                                  col.names = paste0("V", seq_len(ncol)),
                                  sep="\t", na.strings="null", quote = "", comment.char = "#",
                                  blank.lines.skip = TRUE, strip.white = TRUE,
                                  colClasses = "character")
  mztab.table
}

extractPart <- function(mztab.table, matchColumn, matchValue) {
  mztab.table[startsWith(as.character(mztab.table[,matchColumn]), matchValue),]
}

extractParameter <- function(mztab.table, matchColumn, matchValue, valueColumn = "V3") {
  subTable <- extractPart(mztab.table, matchColumn, matchValue)
  stringValue <- asCharacter(subTable, 1, "V3")
  if(!is.null(stringValue)) {
    param <- Parameter$new()
    param$fromString(NULL, stringValue)
    return(param)
  }
  return(NULL)
}

idRegexp <- "\\[([0-9]+)\\]"

idElementRegexp <- "[a-z\\-_]+\\[([0-9]+)\\].*"

extractIds <- function(mtd.sub.table, column, regexp=idRegexp) {
  sapply(mtd.sub.table[, column], FUN = function(x) {
    regmatches(as.character(x), gregexpr(regexp, as.character(x), perl = TRUE))
  })
}

extractId <- function(idElement, regexp=idRegexp) {
  as.numeric(gsub(idElementRegexp, "\\1", as.character(idElement)))
}
# returns a list of elements / wide data frames for each id in the format that jsonlite::fromJSON returns.
# mapEmptyKeyTo can map either to name, param or parameter
extractIdElements <- function(mtd.sub.table, typePrefix, mapEmptyKeyTo="name") {
  #browser()
  databaseRows <- mtd.sub.table[startsWith(as.character(mtd.sub.table$V2), typePrefix),]
  databaseRows$id <- as.numeric(gsub(idElementRegexp, "\\1", databaseRows$V2))
  uniqueIds <- unique(databaseRows$id)
  dbSubTables <- lapply(uniqueIds, function(x, df) {
    # work on subset of data
    subset <- df[df$id==x,c("V2", "V3")]
    # replace typePrefix
    subset$V2 <- gsub(paste0(typePrefix,"[",x,"]"), replacement="", subset$V2, fixed=TRUE)
    # replace remaining "-"
    subset$V2 <- gsub("-", replacement="", subset$V2, fixed=TRUE)
    if (mapEmptyKeyTo=="name") {
      # implicit mapping to name property
      subset$V2[subset$V2==""] <- "name"
    } else if(mapEmptyKeyTo=="param") {
      # implicit mapping to param, e.g. for database
      subset$V2[subset$V2==""] <- "param"
    } else if(mapEmptyKeyTo=="parameter") {
      # implicit mapping to param, e.g. for database
      subset$V2[subset$V2==""] <- "parameter"
    } else {
      warning(paste("Unsupported mapEmptyKeyTo property:", mapEmptyKeyTo, "Supported are 'name', 'param' and 'parameter'"))
    }
    # pivot table from long to wide format (which is the one JSONLITE expects)
    subsetWide <- tidyr::pivot_wider(subset, names_from="V2", values_from="V3")
    # add id as column
    subsetWide$id <- as.numeric(x)
    subsetWide
  }, df = databaseRows)
  dbSubTables
}

extractUnique <- function(mtd.sub.table, column, regexp) {
  unique(regmatches(as.character(mtd.sub.table$V2), gregexpr(regexp, as.character(mtd.sub.table$V2), perl = TRUE)))
}

asCharacter <- function(mtd.sub.table, row, column) {
  as.character(mtd.sub.table[row, column])
}

# mtdIdElements <- c(
#   "sample_processing",
#   "instrument",
#   "software",
#   "publication",
#   "uri",
#   "external_study_uri",
#   "quantification_method",
#   "study_variable",
#   "ms_run",
#   "assay",
#   "sample",
#   "custom",
#   "cv",
#   "database",
#   "derivatization_agent"
# )

# mtdRegexps <- as.vector(paste0(mtdIdElements, idRegexp))
# names(mtdRegexps) <- mtdIdElements
# 
# 
# 
# buildMtdVersion <- function(mztab.mtd.table) {
#   mztab.table[startsWith(as.character(mztab.table$V2), "mzTab-version"),]
# }
# 
# buildMtdId <- function(mztab.mtd.table) {
#   
# }

#' Extract metadata from data frame.
#' @description Extract the \link{Metadata} from an mztab data frame.
#' @param MzTabDataFrame the mztab data frame
#' @export
extractMetadata = function(MzTabDataFrame) {
  prefix <- "MTD"
  mtd.table <- MzTabDataFrame[startsWith(as.character(MzTabDataFrame$V1), prefix), c("V1","V2","V3")]
  mtd.table
}

extractTable = function(mztab.table, headerPrefix, contentPrefix) {
  headers <- mztab.table[startsWith(as.character(mztab.table$V1), headerPrefix),]
  contents <- mztab.table[startsWith(as.character(mztab.table$V1), contentPrefix),]
  if (nrow(contents) >0) {
    df <- data.frame(contents)
    colnames(df) <- as.character(headers[1,])
    df[, colnames(df) != ""] ## remove empty (e.g. trailing) columns
  } else if (nrow(contents) == 0 & nrow(headers==1)) {
    warning("Empty ",contentPrefix, " section")
    df <- data.frame(headers[NULL,])
    colnames(df) <- as.character(headers[1,])
    df[, colnames(df) != ""] ## remove empty (e.g. trailing) columns
  } else {
    warning("No ",contentPrefix, " section")
    df <- NULL ## Maybe empty dataframe with mandatory columns only ?!
  }
}

#' Extract the SmallMoleculeSummary from data frame.
#' @description Extract the \link{SmallMoleculeSummary} from an mztab data frame.
#' @param mztab.table the mztab data frame
#' @export
extractSmallMoleculeSummary <- function(mztab.table) {
  sml <- extractTable(mztab.table, "SMH", "SML")
  rownames(sml) <- sml[,"SML_ID"]
  sml
}

#' Extract the SmallMoleculeFeatures from data frame.
#' @description Extract the \link{SmallMoleculeFeature} from an mztab data frame.
#' @param mztab.table the mztab data frame
#' @export
extractSmallMoleculeFeatures <- function(mztab.table) {
  smf <- extractTable(mztab.table, "SFH", "SMF")
  rownames(smf) <- smf[,"SMF_ID"]
  smf
}

#' Extract the SmallMoleculeEvidence from data frame.
#' @description Extract the \link{SmallMoleculeEvidence} from an mztab data frame.
#' @param mztab.table the mztab data frame
#' @export
extractSmallMoleculeEvidence <- function(mztab.table) {
  sme <- extractTable(mztab.table, "SEH", "SME")
  rownames(sme) <- sme[,"SME_ID"]
  sme
}

#' Extract the Comment from data frame.
#' @description Extract the \link{Comment}s from an mztab data frame.
#' @param mztab.table the mztab data frame
#' @export
extractComments <- function(mztab.table) {
  prefix <- "COM"
  com.table <- mztab.table[startsWith(as.character(mztab.table$V1), prefix), c("V1", "V2")]
  colnames(com.table) <- c("prefix", "msg")
  com.table$line_number <- rownames(com.table)
  com.table
}

splitList <- function(listString, separator="|") {
  paramFields <- trimws(unlist(strsplit(listString, separator, fixed=TRUE)), which = "both")
}

lifs-tools/rmzTab-m documentation built on Jan. 26, 2023, 4:45 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

lifs-tools/rmzTab-m
R Package Client for mzTab-M Reference Implementation and Validation API

R/read_mz_tab.R
In lifs-tools/rmzTab-m: R Package Client for mzTab-M Reference Implementation and Validation API

Defines functions splitList extractComments extractSmallMoleculeEvidence extractSmallMoleculeFeatures extractSmallMoleculeSummary extractTable extractMetadata asCharacter extractUnique extractIdElements extractId extractIds extractParameter extractPart readMzTabString readMzTab

Documented in extractComments extractMetadata extractSmallMoleculeEvidence extractSmallMoleculeFeatures extractSmallMoleculeSummary readMzTab readMzTabString

R Package Documentation

Browse R Packages

We want your feedback!

lifs-tools/rmzTab-m R Package Client for mzTab-M Reference Implementation and Validation API

R/read_mz_tab.R In lifs-tools/rmzTab-m: R Package Client for mzTab-M Reference Implementation and Validation API

Defines functions splitList extractComments extractSmallMoleculeEvidence extractSmallMoleculeFeatures extractSmallMoleculeSummary extractTable extractMetadata asCharacter extractUnique extractIdElements extractId extractIds extractParameter extractPart readMzTabString readMzTab

Documented in extractComments extractMetadata extractSmallMoleculeEvidence extractSmallMoleculeFeatures extractSmallMoleculeSummary readMzTab readMzTabString

R Package Documentation

Browse R Packages

We want your feedback!

lifs-tools/rmzTab-m
R Package Client for mzTab-M Reference Implementation and Validation API

R/read_mz_tab.R
In lifs-tools/rmzTab-m: R Package Client for mzTab-M Reference Implementation and Validation API