#' Convert mass_dataset to mzTab Format
#'
#' @author Xiaotao Shen <shenxt1990@outlook.com>
#' @description This function converts a `mass_dataset` object to mzTab format. The function creates
#' mzTab-compatible tables including metadata (MTD), small molecule (SML), small molecule feature (SMF),
#' and small molecule evidence (SME) tables.
#'
#' @param object A `mass_dataset` object containing the `expression_data`, `sample_info`, and `other_files`.
#' @param path A string specifying the directory where the mzTab file will be saved. Default is the current directory.
#'
#' @return Writes an mzTab file to the specified directory.
#'
#' @examples
#' \dontrun{
#' # Assuming 'dataset' is a mass_dataset object
#' convert_mass_dataset2mztab(dataset, path = "./mzTabFiles/")
#' }
#'
#' @details
#' The function first checks if the `mass_dataset` object contains the necessary mzTab tables (MTD, SML, SMF, SME).
#' If not, it generates these tables based on the `expression_data` and `sample_info` in the `mass_dataset` object.
#' The function then writes these tables to an mzTab file in the specified directory.
#'
#' @export
convert_mass_dataset2mztab <-
function(object,
path = ".") {
dir.create(path, showWarnings = FALSE, recursive = TRUE)
other_files <-
tryCatch(
slot(object = object, name = "other_files"),
error = function(e)
list()
)
if (length(other_files) > 0) {
if (all(c("mtd_table", "sml_table", "smf_table", "sme_table") %in% names(other_files))) {
other_files <-
other_files[c("mtd_table", "sml_table", "smf_table", "sme_table")]
}
}
if (length(other_files) == 0 |
any(!c("mtd_table", "sml_table", "smf_table", "sme_table") %in% names(other_files))) {
object <-
object %>%
activate_mass_dataset(what = "sample_info") %>%
filter(class == "Subject")
sample_info <-
object@sample_info
sample_info$ms_run <-
paste0("ms_run",
"[",
seq_len(nrow(sample_info)),
"]")
sample_info$assay <-
paste0("assay",
"[",
seq_len(nrow(sample_info)),
"]")
sample_info$study_variable <-
paste0("study_variable",
"[",
length(unique(sample_info$class)),
"]")
ms_run_info <-
lapply(sample_info$ms_run,
function(x) {
paste0(
x,
"-",
c(
"location",
"hash_method",
"format",
"fragmentation_method[1]",
"scan_polarity[1]",
"id_format"
)
)
}) %>%
unlist() %>%
data.frame(MTD = "MTD",
name = .,
value = "null")
assay_info <-
data.frame(
MTD = "MTD",
name = paste0(sample_info$assay, "-ms_run_ref"),
value = sample_info$ms_run
)
study_variable_info <-
lapply(unique(sample_info$study_variable),
function(x) {
paste0(
x,
c(
"",
"-description",
"-average_function",
"-variation_function",
"-assay_refs"
)
)
}) %>%
unlist() %>%
data.frame(MTD = "MTD",
name = .,
value = "null")
study_variable_info <-
study_variable_info %>%
dplyr::left_join(sample_info[,c("class", "study_variable")] %>%
dplyr::distinct(study_variable, .keep_all = TRUE),
by = c("name" = "study_variable"))
study_variable_info[is.na(study_variable_info)] <- "null"
study_variable_info <-
study_variable_info %>%
dplyr::mutate(value = dplyr::case_when(
class != "null" ~ class,
TRUE ~ value
)) %>%
dplyr::select(-class)
study_variable_info$value <-
seq_along(study_variable_info$name) %>%
purrr::map(function(i){
x <-
study_variable_info$name[i]
if(stringr::str_detect(x, "assay_refs")){
sample_info %>%
dplyr::filter(study_variable == stringr::str_replace(x, "-assay_refs", "")) %>%
pull(assay) %>%
paste(collapse = "|")
}else{
study_variable_info$value[i]
}
}) %>%
unlist()
###mtd table
mtd_table <-
rbind(mtd_table_info,
ms_run_info,
assay_info,
study_variable_info)
###
variable_info <-
extract_variable_info(object)
expression_data <-
extract_expression_data(object)
###sml table
sml_table <-
data.frame(
SMH = "SML",
SML_ID = variable_info$variable_id,
SMF_ID_REFS = variable_info$variable_id,
database_identifier = "null",
chemical_formula = "null",
smiles = "null",
inchi = "null",
chemical_name = "null",
uri = "null",
theoretical_neutral_mass = "null",
adduct_ions = "null",
reliability = "null",
best_id_confidence_measure = "null",
best_id_confidence_value = "null"
)
colnames(expression_data) <-
paste0("abundance_", sample_info$assay)
sml_table <-
cbind(sml_table, expression_data)
####smf table
smf_table <-
data.frame(
SFH = "SMF",
SMF_ID = variable_info$variable_id,
SME_ID_REFS = variable_info$variable_id,
SME_ID_REF_ambiguity_code = "null",
adduct_ion = "null",
isotopomer = "null",
exp_mass_to_charge = variable_info$mz,
charge = "null",
retention_time_in_seconds = variable_info$rt,
retention_time_in_seconds_start = NA,
retention_time_in_seconds_end = NA
)
smf_table <-
cbind(smf_table, expression_data)
###SME table
sme_table <-
data.frame(
SEH = "SME",
SME_ID = variable_info$variable_id,
evidence_input_id = "null",
database_identifier = "null",
chemical_formula = "null",
smiles = "null",
inchi = "null",
chemical_name = "null",
uri = "null",
derivatized_form = "null",
adduct_ion = "null",
exp_mass_to_charge = variable_info$mz,
charge = "null",
theoretical_mass_to_charge = variable_info$mz,
spectra_ref = "null",
identification_method = "null",
ms_level = "null",
"id_confidence_measure[1]" = "null",
rank = "null",
check.names = FALSE
)
other_files <-
list(
mtd_table = mtd_table,
sml_table = sml_table,
smf_table = smf_table,
sme_table = sme_table
)
}
max_ncol <- c(
ncol(other_files$mtd_table),
ncol(other_files$sml_table),
ncol(other_files$smf_table),
ncol(other_files$sme_table)
) %>%
max()
other_files <-
other_files %>%
purrr::map(function(x) {
if (ncol(x) < max_ncol) {
new_info <-
matrix("", nrow = nrow(x), ncol = max_ncol - ncol(x)) %>%
as.data.frame()
x <-
cbind(x, new_info)
x
} else{
x
}
})
colnames(other_files$mtd_table) <- NULL
other_files$sml_table <-
rbind(rep("", ncol(other_files$sml_table)),
rbind(colnames(other_files$sml_table),
other_files$sml_table))
colnames(other_files$sml_table) <- NULL
other_files$smf_table <-
rbind(rep("", ncol(other_files$smf_table)),
rbind(colnames(other_files$smf_table),
other_files$smf_table))
colnames(other_files$smf_table) <- NULL
other_files$sme_table <-
rbind(rep("", ncol(other_files$sme_table)),
rbind(colnames(other_files$sme_table),
other_files$sme_table))
colnames(other_files$sme_table) <- NULL
colnames(other_files[[1]]) <-
colnames(other_files[[2]]) <-
colnames(other_files[[3]]) <-
colnames(other_files[[4]]) <-
seq_len(max_ncol)
other_files <-
other_files %>%
dplyr::bind_rows() %>%
as.data.frame()
colnames(other_files) <- NULL
rownames(other_files) <- NULL
which(other_files == "V1", arr.ind = TRUE)
idx1 <-
which(other_files[, 1, drop = TRUE] == "SMH")
idx2 <-
which(stringr::str_detect(as.character(other_files[idx1, ]), "V[0-9]{1,2}"))
if (length(idx2) > 0) {
other_files[idx1, idx2] <- ""
}
idx1 <-
which(other_files[, 1, drop = TRUE] == "SFH")
idx2 <-
which(stringr::str_detect(as.character(other_files[idx1, ]), "V[0-9]{1,2}"))
if (length(idx2) > 0) {
other_files[idx1, idx2] <- ""
}
write.table(
x = other_files,
file = file.path(path, "data.mzTab.txt"),
quote = FALSE,
sep = "\t",
row.names = FALSE
)
file.rename(from = file.path(path, "data.mzTab.txt"),
to = file.path(path, "data.mzTab"))
}
mtd_table_name <-
c(
"assay[1]-ms_run_ref",
"assay[2]-ms_run_ref",
"assay[3]-ms_run_ref",
"assay[4]-ms_run_ref",
"assay[5]-ms_run_ref",
"assay[6]-ms_run_ref",
"assay[7]-ms_run_ref",
"assay[8]-ms_run_ref",
"assay[9]-ms_run_ref",
"assay[10]-ms_run_ref",
"assay[11]-ms_run_ref",
"assay[12]-ms_run_ref",
"assay[13]-ms_run_ref",
"assay[14]-ms_run_ref",
"assay[15]-ms_run_ref",
"study_variable[1]",
"study_variable[1]-description",
"study_variable[1]-average_function",
"study_variable[1]-variation_function",
"study_variable[1]-assay_refs",
"study_variable[2]",
"study_variable[2]-description",
"study_variable[2]-average_function",
"study_variable[2]-variation_function",
"study_variable[2]-assay_refs",
"study_variable[3]",
"study_variable[3]-description",
"study_variable[3]-average_function",
"study_variable[3]-variation_function",
"study_variable[3]-assay_refs",
"cv[1]-label",
"cv[1]-uri",
"cv[1]-version",
"cv[1]-full_name",
"cv[2]-label",
"cv[2]-uri",
"cv[2]-version",
"cv[2]-full_name",
"cv[3]-label",
"cv[3]-uri",
"cv[3]-version",
"cv[3]-full_name",
"cv[4]-label",
"cv[4]-uri",
"cv[4]-version",
"cv[4]-full_name",
"cv[5]-label",
"cv[5]-uri",
"cv[5]-version",
"cv[5]-full_name",
"cv[6]-label",
"cv[6]-uri",
"cv[6]-version",
"cv[6]-full_name",
"small_molecule-quantification_unit",
"small_molecule_feature-quantification_unit",
"small_molecule-identification_reliability",
"database[1]",
"database[1]-prefix",
"database[1]-uri",
"database[1]-version"
)
mtd_table_info <-
data.frame(
MTD = "MTD",
name = c(
"mzTab-version",
"mzTab-ID",
"contact[1]-name",
"contact[1]-email",
"contact[1]-affiliation",
"contact[2]-name",
"contact[2]-email",
"contact[2]-affiliation",
"publication[1]",
"instrument[1]-name",
"instrument[1]-source",
"instrument[1]-analyzer[1]",
"instrument[1]-detector",
"quantification_method",
"sample[1]-species[1]",
"sample[1]-cell_type[1]",
"sample[1]-tissue[1]",
"sample_processing[1]",
"software[1]",
"software[1]-setting[1]"
),
value = "null"
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.