library(XML)
library(jsonlite)
mzml_file <- "/home/rmflight/Music/big_data_notbackup/CESB_rawdata/replication_study/cancer_positive_mode/mzml_full_conversion/SBC110871exopos.mzML"
xml_doc <- xmlTreeParse(mzml_file, useInternalNodes = TRUE)
ns <- xmlNamespaceDefinitions(xmlRoot(xml_doc), recursive = TRUE, simplify = TRUE)
names(ns)[1] <- "d1"
to_data_frame <- function(in_list){
  if (class(in_list) == "list") {
    out_list <- lapply(in_list, to_data_frame)
  } else if (class(in_list) == "character") {
    if (!is.null(names(in_list))) {
      out_list <- as.data.frame(t(as.matrix(in_list)))
    } else {
      out_list <- in_list
    }

  }
  out_list
}

remove_attrs <- function(in_list){
  if (class(in_list) == "list") {
    out_list <- in_list
    list_names <- names(out_list)

    if (".attrs" %in% list_names) {
      tmp_attrs <- out_list[[".attrs"]]

      name_attrs <- names(tmp_attrs)

      if (sum(name_attrs %in% list_names) == 0) {
        for (i_name in name_attrs) {
          out_list[[i_name]] <- tmp_attrs[[i_name]]
        }
        out_list[[".attrs"]] <- NULL
      }
    } else {
      out_list <- lapply(out_list, remove_attrs)
    }
    # still need to check the rest of the pieces of the list!
    out_list <- lapply(out_list, remove_attrs)
  } else {
    out_list <- in_list
  }
  out_list
}


#' get_scan_mode
#' 
#' takes a list from xmlToList for "run" and looks at whether all scans are positive, negative, or mixed
#' 
#' @param spectrum_list the list of spectra
#' 
get_scan_polarity <- function(spectrum_list){
  spectrum_list[[".attrs"]] <- NULL
  scan_data <- lapply(spectrum_list, function(in_spectrum){
    cv_loc <- which(names(in_spectrum) %in% "cvParam")
    cv_data <- unlist(in_spectrum[cv_loc])
    scan_polarity <- grep("scan", cv_data, value = TRUE)

    scan_polarity
  })

  scan_polarity <- as.character(unique(scan_data))

  if ((length(scan_polarity) == 1) && (grepl("positive", scan_polarity))) {
    out_polarity <- "positive"
  } else if ((length(scan_polarity) == 1) && (grepl("negative", scan_polarity))) {
    out_polarity <- "negative"
  } else {
    out_polarity <- "mixed"
  }
  out_polarity
}
#xml_list <- xmlToList(xml_doc)
mz_data <- getNodeSet(xml_doc, "/d1:indexedmzML/d1:mzML", ns)

mzml_out <- list()
tmp_attr <- unclass(xmlAttrs(mz_data[[1]]))

attr(tmp_attr, "namespaces") <- NULL
tmp_attr["namespace"] <- attr(tmp_attr, "namespace")
mzml_out[["mzML"]][[".attrs"]] <- tmp_attr

other_nodes_2_get <- c("cvList", "fileDescription",
                       "referenceableParamGroupList",
                       "softwareList",
                       "instrumentConfigurationList",
                       "dataProcessingList")

other_nodes <- xmlChildren(mz_data[[1]])
other_list <- lapply(other_nodes, xmlToList)

trim_other_list <- other_list[other_nodes_2_get]

mzml_out <- c(mzml_out, trim_other_list)

mzml_out[["run"]][[".attrs"]] <- xmlAttrs(mz_data[[1]][["run"]])
mzml_out2 <- remove_attrs(mzml_out)

mzml_out_frame <- to_data_frame(mzml_out2)

mzml_out_frame$run$scanPolarity <- get_scan_polarity(other_list$run$spectrumList)
tmp_fun <- function(a = NULL, b = 10, c = NULL){
  arg_list <- as.list(match.call(expand.dots = TRUE))
  arg_list$a <- force(a)
  arg_list
}

crap <- tmp_fun()
print(crap)

crap2 <- tmp_fun(a = 20)
print(crap2)

tmp_val <- 10
crap3 <- tmp_fun(a = tmp_val)
crap3


MoseleyBioinformaticsLab/FTMS.peakCharacterization documentation built on April 27, 2022, 3:32 a.m.