#' Convert mzTab Data to mass_dataset Object
#'
#' @author Xiaotao Shen <shenxt1990@outlook.com>
#' @description This function converts mzTab data into a `mass_dataset` object.
#' It processes the mzTab data to create a `mass_dataset` object containing expression data, sample information, and variable information.
#'
#' @param file The name of the mzTab file to be read.
#' @param path The directory where the mzTab file is located. Default is the current directory.
#'
#' @return A `mass_dataset` object containing the processed mzTab data.
#'
#' @examples
#' \dontrun{
#' # Assuming 'mztab_file' is the name of the mzTab file
#' mass_dataset <- convert_mztab2mass_dataset(file = mztab_file)
#' }
#'
#' @details
#' The function reads mzTab data and processes it to create a `mass_dataset` object.
#' It extracts sample information, variable information, and expression data.
#' It also performs checks to ensure the data is correctly formatted.
#'
#' @export
convert_mztab2mass_dataset <-
function(file,
path = ".") {
options(warn = -1)
data <-
read_mztab(file = file, path = path)
####sample information
mtd_table <-
data$mtd_table
group_id <-
mtd_table %>%
dplyr::filter(stringr::str_detect(name, "study_variable")) %>%
pull(name) %>%
stringr::str_replace_all("-.*", "") %>%
unique()
group <-
mtd_table$value[match(group_id, mtd_table$name)]
sample_info <-
seq_len(length(group)) %>%
purrr::map(function(i) {
x <- group_id[i] %>%
stringr::str_replace("\\[", "\\\\[") %>%
stringr::str_replace("\\]", "\\\\]")
temp_sample_id <-
mtd_table %>%
dplyr::filter(stringr::str_detect(name, x)) %>%
dplyr::filter(stringr::str_detect(name, "assay_refs")) %>%
pull(value) %>%
stringr::str_split("\\|") %>%
`[[`(1)
data.frame(group = group[i],
group_id = group_id[i],
sample_id = temp_sample_id)
}) %>%
dplyr::bind_rows()
sml_table <-
data$sml_table
expression_data <-
sml_table %>%
dplyr::select(dplyr::contains("abundance_assay"))
variable_info <-
sml_table %>%
dplyr::select(-colnames(expression_data)) %>%
dplyr::rename(variable_id = SML_ID) %>%
dplyr::select(-SMH)
expression_data_group <-
variable_info %>%
dplyr::select(dplyr::contains("abundance_study_variable"))
variable_info <-
variable_info %>%
dplyr::select(-colnames(expression_data_group))
group_cv <-
variable_info %>%
dplyr::select(dplyr::contains("abundance_variation_study_variable"))
variable_info <-
variable_info %>%
dplyr::select(-colnames(group_cv))
colnames(expression_data) <-
colnames(expression_data) %>%
stringr::str_replace("abundance_", "")
rownames(expression_data) <- variable_info$variable_id
expression_data <-
expression_data[, sample_info$sample_id]
colnames(expression_data) <-
colnames(expression_data) %>%
stringr::str_replace("\\[", "_") %>%
stringr::str_replace("\\]", "")
sample_info$sample_id <-
colnames(expression_data)
sample_info <-
sample_info %>%
dplyr::select(sample_id, dplyr::everything()) %>%
dplyr::mutate(class = "Subject")
mz_rt <-
purrr::map(variable_info$SMF_ID_REFS, function(x) {
temp_data <-
data$smf_table %>%
dplyr::filter(SMF_ID %in% stringr::str_split(x, "\\|")[[1]]) %>%
head(1)
temp_data %>%
dplyr::select(mz = exp_mass_to_charge,
rt = retention_time_in_seconds) %>%
dplyr::mutate(mz = as.numeric(mz),
rt = as.numeric(rt))
}) %>%
dplyr::bind_rows()
variable_info <-
variable_info %>%
dplyr::mutate(mz = mz_rt$mz,
rt = mz_rt$rt)
sample_info_note <-
data.frame(
name = colnames(sample_info),
meaning = colnames(sample_info),
check.names = FALSE
)
variable_info_note <-
data.frame(
name = colnames(variable_info),
meaning = colnames(variable_info),
check.names = FALSE
)
rownames(expression_data) <- variable_info$variable_id
check_result <-
check_mass_dataset(
expression_data = expression_data,
sample_info = sample_info,
variable_info = variable_info,
sample_info_note = sample_info_note,
variable_info_note = variable_info_note
)
if (stringr::str_detect(check_result, "error")) {
stop(check_result)
}
process_info = list()
parameter <- new(
Class = "tidymass_parameter",
pacakge_name = "massdataset",
function_name = "convet_mztabl2mass_dataset()",
parameter = list("no" = "no"),
time = Sys.time()
)
process_info$create_mass_dataset = parameter
object <- new(
Class = "mass_dataset",
expression_data = expression_data,
ms2_data = list(),
annotation_table = data.frame(),
sample_info = sample_info,
variable_info = variable_info,
sample_info_note = sample_info_note,
variable_info_note = variable_info_note,
process_info = process_info,
other_files = data,
version = as.character(utils::packageVersion(pkg = "massdataset"))
)
object
}
#' Read mzTab Data File
#'
#' @author Xiaotao Shen <shenxt1990@outlook.com>
#' @description This function reads an mzTab data file and returns a list containing various tables such as Metadata (MTD), Small Molecule (SML), Small Molecule Feature (SMF), and Small Molecule Evidence (SME).
#'
#' @param file The name of the mzTab file to be read.
#' @param path The directory where the mzTab file is located. Default is the current directory.
#'
#' @return A list containing the following elements:
#' - `mtd_table`: Metadata table
#' - `sml_table`: Small Molecule table
#' - `smf_table`: Small Molecule Feature table
#' - `sme_table`: Small Molecule Evidence table
#'
#' @examples
#' \dontrun{
#' # Assuming 'mztab_file' is the name of the mzTab file
#' mztab_data <- read_mztab(file = mztab_file)
#' }
#'
#' @details
#' The function reads an mzTab file and extracts various tables such as MTD, SML, SMF, and SME.
#' It performs necessary data transformations and type conversions.
#'
#' @export
read_mztab <-
function(file, path = ".") {
data <-
readr::read_csv(file.path(path, file),
show_col_types = FALSE,
col_names = FALSE) %>%
as.data.frame()
##Overall structure of an mzTab-M file.
##(A) Metadata about the experiment,
## describing experimental design (study variables and assays), links
###to other files, etc.
###(B) The small molecule (SML) table,
###capturing “final” results table: i.e., overall calculated quantification
###value (and identity where known) of a metabolite.
###(C) Quantification value in each (aligned) MS run for MS1 features:
###e.g., mapped to individual adducts or charge states of molecule.
###(D) Evidence supporting identification (with ambiguity if needed)
###for molecules, using CV terms for scores/statistics where available.
####extract MTD table
idx <- which(stringr::str_detect(data$X1, "^MTD"))
mtd_table <-
data[idx, , drop = FALSE]$X1 %>%
purrr::map(function(x) {
stringr::str_split(x, "\\\t")[[1]]
}) %>%
do.call(rbind, .) %>%
as.data.frame()
colnames(mtd_table) <- c("MTD", "name", "value")
####SML table
idx <- which(stringr::str_detect(data$X1, "^SMH|^SML"))
sml_table <-
data[idx, , drop = FALSE]$X1 %>%
purrr::map(function(x) {
stringr::str_split(x, "\\\t")[[1]]
}) %>%
do.call(rbind, .) %>%
as.data.frame()
colnames(sml_table) <- as.character(sml_table[1, ])
sml_table <- sml_table[-1, , drop = FALSE]
sml_table$theoretical_neutral_mass <-
as.numeric(sml_table$theoretical_neutral_mass)
idx <-
which(stringr::str_detect(colnames(sml_table), "abundance"))
for (i in idx) {
sml_table[, i] <- as.numeric(sml_table[, i])
}
####SMF table
idx <- which(stringr::str_detect(data$X1, "^SMF|^SFH"))
smf_table <-
data[idx, , drop = FALSE]$X1 %>%
purrr::map(function(x) {
stringr::str_split(x, "\\\t")[[1]]
}) %>%
do.call(rbind, .) %>%
as.data.frame()
colnames(smf_table) <- as.character(smf_table[1, ])
smf_table <- smf_table[-1, , drop = FALSE]
idx <-
which(stringr::str_detect(colnames(smf_table), "abundance"))
for (i in idx) {
smf_table[, i] <- as.numeric(smf_table[, i])
}
####SME table
idx <- which(stringr::str_detect(data$X1, "^SEH|^SME"))
sme_table <-
data[idx, , drop = FALSE]$X1 %>%
purrr::map(function(x) {
stringr::str_split(x, "\\\t")[[1]]
}) %>%
do.call(rbind, .) %>%
as.data.frame()
colnames(sme_table) <- as.character(sme_table[1, ])
sme_table <- sme_table[-1, , drop = FALSE]
return_result <-
list(
mtd_table = mtd_table,
sml_table = sml_table,
smf_table = smf_table,
sme_table = sme_table
)
return(return_result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.