R/msp2FSdb.R

Defines functions msp2FSdb

Documented in msp2FSdb

## This function was designed not only to achieve the fastest computational speed; but also
## can standardize .msp files that were generated by inconsistent settings. (Sadjad)
##
msp2FSdb <- function(path, MSPfile_vector = "", massIntegrationWindow = 0, allowedNominalMass = FALSE,
                     allowedWeightedSpectralEntropy = TRUE, noiseRemovalRatio = 0.01, number_processing_threads = 1) {
  ##
  ##############################################################################
  ##
  SEnumPeaksFragmentListInf000 <- list(-Inf, 0, matrix(nrow = 0, ncol = 2))
  ##
  if ((massIntegrationWindow > 0) | allowedNominalMass) {
    fragmentationPeaksIntegrationCheck <- TRUE
  } else {
    fragmentationPeaksIntegrationCheck <- FALSE
  }
  ##
  if (noiseRemovalRatio <= 0) {
    noiseRemovalRatio <- 1e-16 ## To avoid zeros and negative values for the logarithmic calculations
  }
  ##
  L_msp <- length(MSPfile_vector)
  ##
  ##############################################################################
  ##
  replace1EqualSign <- function(strEqEqEq) {
    xEq <- FSA_locate_regex(strEqEqEq, "=")[1, 1]
    strColonEqEq <- paste0(substr(strEqEqEq, 1, xEq - 1), ": ", substr(strEqEqEq, xEq + 1, nchar(strEqEqEq)))
    return(strColonEqEq)
  }
  ##
  ##############################################################################
  ##
  MoNA_comments_deconvolution <- function(mspBlock, LmspBlock, loc_numPeaksRaw) {
    loc_comments <- grep("Comments: ", mspBlock, ignore.case = TRUE)
    if (length(loc_comments) > 0) {
      loc_comments <- loc_comments[1]
      mspBlock[loc_comments] <- gsub("Parent=", "PrecursorMZ=", mspBlock[loc_comments], ignore.case = TRUE)
      comments_str <- substr(mspBlock[loc_comments], 11, nchar(mspBlock[loc_comments])) # nchar("Comments: ") + 1 = 11
      comments_strsplit <- strsplit(comments_str, '" "')[[1]]
      xEqualSign <- grep("=", comments_strsplit)
      comments_strsplit1 <- comments_strsplit[xEqualSign]
      comments_strsplit1 <- do.call(c, lapply(comments_strsplit1, function(s) {replace1EqualSign(s)}))
      comments_strsplit1 <- gsub('"', '', comments_strsplit1)
      ##
      comments_strsplit2 <- paste0(comments_strsplit[setdiff(1:length(comments_strsplit), xEqualSign)], collapse = '" "')
      mspBlock[loc_comments] <- paste0('comments: ', comments_strsplit2)
      ##
      mspBlock <- c(mspBlock[1:(loc_comments - 1)], comments_strsplit1, mspBlock[loc_comments:LmspBlock])
      loc_numPeaksRaw <- loc_numPeaksRaw + length(xEqualSign)
    }
    ##
    commentsList <- list(mspBlock, loc_numPeaksRaw)
    ##
    return(commentsList)
  }
  ##
  ##############################################################################
  ##
  NIST_comment_deconvolution <- function(mspBlock, LmspBlock, loc_numPeaksRaw) {
    loc_comment <- grep("Comment: ", mspBlock, ignore.case = TRUE)
    if (length(loc_comment) > 0) {
      loc_comment <- loc_comment[1]
      mspBlock[loc_comment] <- gsub("Parent=", "PrecursorMZ=", mspBlock[loc_comment], ignore.case = TRUE)
      comment_str <- substr(mspBlock[loc_comment], 10, nchar(mspBlock[loc_comment])) # nchar("Comment: ") + 1 = 10
      comment_str <- gsub('"', '', comment_str)
      comment_strsplit <- strsplit(comment_str, " ")[[1]]
      xEqualSign <- grep("=", comment_strsplit)
      comment_strsplit1 <- comment_strsplit[xEqualSign]
      comment_strsplit1 <- do.call(c, lapply(comment_strsplit1, function(s) {replace1EqualSign(s)}))
      ##
      comment_strsplit2 <- paste0(comment_strsplit[setdiff(1:length(comment_strsplit), xEqualSign)], collapse = " ")
      mspBlock[loc_comment] <- paste0("comment: ", comment_strsplit2)
      ##
      mspBlock <- c(mspBlock[1:(loc_comment - 1)], comment_strsplit1, mspBlock[loc_comment:LmspBlock])
      loc_numPeaksRaw <- loc_numPeaksRaw + length(xEqualSign)
    }
    ##
    commentList <- list(mspBlock, loc_numPeaksRaw)
    ##
    return(commentList)
  }
  ##
  ##############################################################################
  ##
  ListMSPfile_call <- function(i) {
    ##
    mspFileLocation <- paste0(path, "/", MSPfile_vector[i])
    mspFileLocation <- gsub("\\", "/", mspFileLocation, fixed = TRUE)
    strmspFileLocation <- strsplit(mspFileLocation, "/")[[1]]
    mspFileName <- strmspFileLocation[length(strmspFileLocation)]
    mspFileLocation <- paste0(strmspFileLocation, collapse = "/")
    ##
    readLinesMSP <- tryCatch(readLines(mspFileLocation, warn = FALSE),
                             error = function(e){FSA_logRecorder(paste0("Problem with loading .msp file --> `", mspFileName, "`!"))})
    readLinesMSP <- c("", readLinesMSP, "")
    ############################################################################
    loc_start <- grep("[a-zA-Z]", readLinesMSP, ignore.case = TRUE)[1]
    loc_NumPeaks <- grep("Num Peaks: ", readLinesMSP, ignore.case = TRUE)
    LreadLinesMSP <- length(loc_NumPeaks)
    ## MoNA correction
    commentListReadLinesMSP <- MoNA_comments_deconvolution(readLinesMSP[loc_start:(loc_NumPeaks[1] - 1)], (loc_NumPeaks[1] - loc_start), (loc_NumPeaks[1] - loc_start))
    ## NIST correction
    commentListReadLinesMSP <- NIST_comment_deconvolution(commentListReadLinesMSP[[1]], commentListReadLinesMSP[[2]], commentListReadLinesMSP[[2]])
    headersReadLinesMSP <- commentListReadLinesMSP[[1]]
    ##
    headers <- do.call(c, lapply(headersReadLinesMSP, function(j) {
      x_colon <- FSA_locate_regex(j, ":")
      if (!is.null(x_colon)) {
        substr(j, 1, (x_colon[1, 1] - 1))
      }
    }))
    ##
    list(readLinesMSP, rep(mspFileName, LreadLinesMSP), headers)
  }
  ##
  ##############################################################################
  ##
  SEnumPeaksFragmentList_call <- function(mspBlock, loc_numPeaksRaw, numPeaksRaw,
                                          fragmentationPeaksIntegrationCheck, massIntegrationWindow, allowedNominalMass,
                                          noiseRemovalRatio, allowedWeightedSpectralEntropyRaw, SEnumPeaksFragmentListInf000) {
    ##
    seperatingString <- if (grepl("\t", mspBlock[loc_numPeaksRaw + 1])) {"\t"} else {" "}
    ##
    FragmentList <- do.call(rbind, lapply((loc_numPeaksRaw + 1):(loc_numPeaksRaw + numPeaksRaw), function(j) {
      strFragmentList <- strsplit(mspBlock[j], seperatingString)[[1]]
      c(strFragmentList[1], strFragmentList[2])
    }))
    FragmentList <- matrix(as.numeric(FragmentList), ncol = 2)
    ##
    if (fragmentationPeaksIntegrationCheck) {
      FragmentList <- spectra_integrator(FragmentList, massIntegrationWindow, allowedNominalMass)
    }
    ##
    numberFragments <- dim(FragmentList)[1]
    ##
    if (numberFragments > 0) {
      ##
      SEfragList <- spectral_entropy_calculator(FragmentList, allowedWeightedSpectralEntropyRaw, noiseRemovalRatio)
      ##
      numPeaks <- SEfragList[[2]]
      if (numPeaks > 0) {
        SEfragmentList <- SEfragList[[3]]
        if (numPeaks > 1) {
          SEfragmentList <- SEfragmentList[order(SEfragmentList[, 2], decreasing = TRUE), ]
        }
        ##
        SEnumPeaksFragmentList <- list(SEfragList[[1]], numPeaks, SEfragmentList)
      } else {
        SEnumPeaksFragmentList <- SEnumPeaksFragmentListInf000
      }
    } else {
      ##
      SEnumPeaksFragmentList <- SEnumPeaksFragmentListInf000
    }
    return(SEnumPeaksFragmentList)
  }
  ##
  ##############################################################################
  ##
  mspList_call <- function(i) {
    ##
    mspBlock <- MSPfile[x1msp[i]:x2msp[i]]
    ##
    loc_numPeaksRaw <- grep("Num Peaks:", mspBlock, ignore.case = TRUE)
    if (length(loc_numPeaksRaw) > 0) {
      LmspBlock <- x2msp[i] - x1msp[i] + 1
      numPeaksRaw <- LmspBlock - loc_numPeaksRaw
      ##########################################################################
      commentList <- NIST_comment_deconvolution(mspBlock, LmspBlock, loc_numPeaksRaw)
      mspBlock <- commentList[[1]]
      loc_numPeaksRaw <- commentList[[2]]
      ##########################################################################
      commentsList <- MoNA_comments_deconvolution(mspBlock, LmspBlock, loc_numPeaksRaw)
      mspBlock <- commentsList[[1]]
      loc_numPeaksRaw <- commentsList[[2]]
      ##########################################################################
      loc_precursormz <- grep("PrecursorMZ:", mspBlock, ignore.case = TRUE)
      if (length(loc_precursormz) > 0) {
        strPrecursorMZ <- substr(mspBlock[loc_precursormz[1]], 13, nchar(mspBlock[loc_precursormz[1]])) # nchar("PrecursorMZ:") + 1 = 13
        PrecursorMZ <- suppressWarnings(as.numeric(strPrecursorMZ))
        ##
        if (is.na(PrecursorMZ)) {
          if (strPrecursorMZ != "") {
            strSplitPrecursorMZ <- strsplit(strPrecursorMZ, ",|;| |-")[[1]] ## The `space` character should be here
            PrecursorMZ <- suppressWarnings(as.numeric(strSplitPrecursorMZ))
            LPrecursorMZ <- length(PrecursorMZ)
            if (LPrecursorMZ > 1) {
              xNonNAprecursorMZ <- which(!is.na(PrecursorMZ))
              LxNonNAprecursorMZ <- length(xNonNAprecursorMZ)
              if (LPrecursorMZ > LxNonNAprecursorMZ) {
                PrecursorMZ <- PrecursorMZ[xNonNAprecursorMZ]
              } else if (LxNonNAprecursorMZ == 0) {
                PrecursorMZ <- NA
              }
            }
          }
        }
      } else {
        PrecursorMZ <- NA
      }
      ##########################################################################
      if (numPeaksRaw > 0) {
        ##
        ########################################################################
        ## To avoid repeating weight transformation
        loc_weightedSpectralEntropy <- grep("Weighted_Spectral_Entropy_Transformation:", mspBlock, ignore.case = TRUE)
        ##
        if (length(loc_weightedSpectralEntropy) > 0) {
          strWeightedSpectralEntropyRaw <- substr(mspBlock[loc_weightedSpectralEntropy[1]], 43, nchar(mspBlock[loc_weightedSpectralEntropy[1]])) # nchar("Weighted_Spectral_Entropy_Transformation: ") + 1 = 43
          weightedSpectralEntropyRaw <- tryCatch(eval(parse(text = strWeightedSpectralEntropyRaw)), error = function(e){FALSE})
          ##
          if (weightedSpectralEntropyRaw) {
            allowedWeightedSpectralEntropyRaw <- FALSE
          } else {
            allowedWeightedSpectralEntropyRaw <- allowedWeightedSpectralEntropy
          }
          ##
        } else {
          allowedWeightedSpectralEntropyRaw <- allowedWeightedSpectralEntropy
        }
        ##
        ########################################################################
        ##
        SEnumPeaksFragmentList <- tryCatch(SEnumPeaksFragmentList_call(mspBlock, loc_numPeaksRaw, numPeaksRaw,
                                                                       fragmentationPeaksIntegrationCheck, massIntegrationWindow, allowedNominalMass,
                                                                       noiseRemovalRatio, allowedWeightedSpectralEntropyRaw, SEnumPeaksFragmentListInf000),
                                           warning = function(w) {SEnumPeaksFragmentListInf000},
                                           error = function(e) {SEnumPeaksFragmentListInf000})
      } else {
        SEnumPeaksFragmentList <- SEnumPeaksFragmentListInf000
      }
      ##
      spectralEntropy <- SEnumPeaksFragmentList[[1]]
      numPeaks <- SEnumPeaksFragmentList[[2]]
      mspBlockFragmentList <- SEnumPeaksFragmentList[[3]]
      ##########################################################################
      AncillaryParametersVec <- rep("", Lheaders)
      ##
      mspBlockNchar <- do.call(c, lapply(1:(loc_numPeaksRaw - 1), function(j) {
        nchar(mspBlock[j])
      }))
      ##
      ordermspBlockNchar <- order(mspBlockNchar, decreasing = FALSE)
      ##
      for (j in ordermspBlockNchar) {
        x_colon <- FSA_locate_regex(mspBlock[j], ":")
        ##
        if (!is.null(x_colon)) {
          x_colon <- x_colon[1, 1]
          jheader <- tolower(substr(mspBlock[j], 1, (x_colon - 1)))
          ##
          x_j <- which(headers == jheader)
          if (length(x_j) == 1) {
            AncillaryParametersVec[x_j] <- substr(mspBlock[j], (x_colon + 2), mspBlockNchar[j])
          }
        }
      }
      ##########################################################################
      processedMSPblock <- lapply(PrecursorMZ, function(j) {
        list(PMZ_list = j, NP_list = numPeaks, SE_list = spectralEntropy, FL_list = mspBlockFragmentList, AnP_list = AncillaryParametersVec)
      })
      ##
    } else {
      ##########################################################################
      processedMSPblock <- NULL
      ##
    }
    return(processedMSPblock)
  }
  ##
  ##############################################################################
  ##
  if (number_processing_threads == 1) {
    ##
    ListMSPfile <- lapply(1:L_msp, function(i) {
      ListMSPfile_call(i)
    })
    ##
  } else {
    ##
    ############################################################################
    ##
    osType <- Sys.info()[['sysname']]
    ##
    if (osType == "Windows") {
      ##
      ##########################################################################
      ####
      clust <- makeCluster(number_processing_threads)
      clusterExport(clust, setdiff(ls(), c("clust", "L_msp")), envir = environment())
      ##
      ListMSPfile <- parLapply(clust, 1:L_msp, function(i) {
        ListMSPfile_call(i)
      })
      ##
      stopCluster(clust)
      ##
      ##########################################################################
      ##
    } else {
      ##
      ListMSPfile <- mclapply(1:L_msp, function(i) {
        ListMSPfile_call(i)
      }, mc.cores = number_processing_threads)
      ##
      closeAllConnections()
      ##
      ##########################################################################
      ##
    }
  }
  ##
  ##############################################################################
  ##############################################################################
  ##
  MSPfile <- do.call(c, lapply(1:L_msp, function(i) {
    ListMSPfile[[i]][[1]]
  }))
  ##
  mspLibName <- do.call(c, lapply(1:L_msp, function(i) {
    ListMSPfile[[i]][[2]]
  }))
  ##
  headers <- do.call(c, lapply(1:L_msp, function(i) {
    ListMSPfile[[i]][[3]]
  }))
  ##
  ListMSPfile <- NULL
  ########################## Ancillary parameters ##############################
  headers <- tolower(headers)
  headers <- unique(headers)
  headers <- sort(headers)
  Lheaders <- length(headers)
  ##############################################################################
  x_msp <- which((MSPfile == "") | (MSPfile == " ") | (MSPfile == "  ") | (MSPfile == "\t"))
  xdiff <- which(diff(x_msp) > 1)
  x1msp <- x_msp[xdiff] + 1
  x2msp <- x_msp[xdiff + 1] - 1
  ##
  ##############################################################################
  ##############################################################################
  ##
  if (number_processing_threads == 1) {
    ##
    mspList <- lapply(1:length(xdiff), function(i) {
      mspList_call(i)
    })
    ##
    MSPfile <- NULL
    ##
    lengthMSPbolcks <- do.call(c, lapply(mspList, function(i) {
      length(i)
    }))
    ##
  } else {
    ##
    ############################################################################
    ##
    if (osType == "Windows") {
      ##
      ##########################################################################
      ####
      clust <- makeCluster(number_processing_threads)
      clusterExport(clust, setdiff(ls(), c("clust")), envir = environment())
      ##
      mspList <- parLapply(clust, 1:length(xdiff), function(i) {
        mspList_call(i)
      })
      stopCluster(clust)
      ####
      MSPfile <- NULL
      ##
      clust <- makeCluster(number_processing_threads)
      clusterExport(clust, NULL, envir = environment())
      ##
      lengthMSPbolcks <- do.call(c, parLapply(clust, mspList, function(i) {
        length(i)
      }))
      ##
      stopCluster(clust)
      ##
      ##########################################################################
      ##
    } else {
      ##
      mspList <- mclapply(1:length(xdiff), function(i) {
        mspList_call(i)
      }, mc.cores = number_processing_threads)
      ##
      MSPfile <- NULL
      ##
      lengthMSPbolcks <- do.call(c, mclapply(mspList, function(i) {
        length(i)
      }, mc.cores = number_processing_threads))
      ##
      closeAllConnections()
      ##
      ##########################################################################
      ##
    }
  }
  ##
  ##############################################################################
  ##############################################################################
  ##
  xNonNull <- which(lengthMSPbolcks > 0)
  ##
  NumPeaks_PrecursorMZ_SpectralEntropy <- do.call(rbind, lapply(xNonNull, function(i) {
    mspL <- mspList[[i]]
    do.call(rbind, lapply(1:lengthMSPbolcks[i], function(j) {
      c(mspL[[j]]$NP_list, mspL[[j]]$PMZ_list, mspL[[j]]$SE_list)
    }))
  }))
  ##
  NumPeaks <- NumPeaks_PrecursorMZ_SpectralEntropy[, 1]
  PrecursorMZ <- NumPeaks_PrecursorMZ_SpectralEntropy[, 2]
  spectralEntropyVector <- NumPeaks_PrecursorMZ_SpectralEntropy[, 3]
  NumPeaks_PrecursorMZ_SpectralEntropy <- NULL
  ##
  FragmentList <- unlist(lapply(xNonNull, function(i) {
    mspL <- mspList[[i]]
    lapply(1:lengthMSPbolcks[i], function(j) {
      mspL[[j]]$FL_list
    })
  }), recursive = FALSE)
  ##
  mspAncillaryParameters <- do.call(rbind, lapply(xNonNull, function(i) {
    mspL <- mspList[[i]]
    do.call(rbind, lapply(1:lengthMSPbolcks[i], function(j) {
      mspL[[j]]$AnP_list
    }))
  }))
  ##
  mspList <- NULL
  ##############################################################################
  ########################## library standardization ###########################
  ##############################################################################
  ## meta-variables in the `headerFamily` array all should be in lowercase!!!
  ## headersFamily <- c("mother of meta-variables", "children of the meta-variables")
  headersFamily <- list(c("retention_time", "retentiontime", "precursorrt", "precursor_rt"),
                        c("precursor_intensity", "precursorintensity", "precursorint", "precursor_int"),
                        c("precursor_type", "precursortype"),
                        c("ms_level", "mslevel", "spectrumtype", "spectrum_type"),
                        c("accession", "db#"),
                        c("collision_energy", "collisionenergy", "ce", "energy"),
                        c("instrument_type", "instrumenttype"),
                        c("ion_mode", "ionmode"),
                        c("exact_mass", "exactmass"),
                        c("smiles", "computed_smiles"))
  ##
  ##############################################################################
  ##
  LnumPeaks <- length(NumPeaks)
  ##
  headersMerger <- function(mspAncillaryParameters, keepColumn, removeColumn, LnumPeaks) {
    ##
    for (i in removeColumn) {
      ##
      xK <- which(mspAncillaryParameters[, keepColumn] == "")
      ##
      xR <- which(mspAncillaryParameters[, i] != "")
      ##
      xR1 <- xR[xR %in% xK]
      if (length(xR1) > 0) {
        mspAncillaryParameters[xR1, keepColumn] <- mspAncillaryParameters[xR1, i]
      }
      ##
      xR2 <- setdiff(xR, xR1)
      if (length(xR2) > 0) {
        xR3 <- do.call(c, lapply(xR2, function(j) {
          if (nchar(mspAncillaryParameters[j, keepColumn]) < nchar(mspAncillaryParameters[j, i])) {
            j
          }
        }))
        ##
        if (length(xR3) > 0) {
          mspAncillaryParameters[xR3, keepColumn] <- mspAncillaryParameters[xR3, i]
        }
      }
    }
    ##
    mspAncillaryParameters <- matrix(mspAncillaryParameters[, -removeColumn], nrow = LnumPeaks)
    ##
    return(mspAncillaryParameters)
  }
  ##
  ##############################################################################
  ## To remove duplicate column headers
  headers <- gsub(" ", "_", headers)
  tableHeaders <- table(headers)
  x_table <- which(tableHeaders > 1)
  if (length(x_table) > 0) {
    for (i in names(tableHeaders[x_table])) {
      xTableHeader <- which(headers == i)
      LxTableHeader <- length(xTableHeader)
      ##
      keepColumn <- xTableHeader[1]
      removeColumn <- xTableHeader[2:LxTableHeader]
      mspAncillaryParameters <- headersMerger(mspAncillaryParameters, keepColumn, removeColumn, LnumPeaks)
      ##
      headers <- headers[-xTableHeader[2:LxTableHeader]]
    }
  }
  ##
  ##############################################################################
  ##
  for (i in headersFamily) {
    subHeaders <- i[2:length(i)]
    subHeaders <- headers[headers %in% subHeaders]
    LsubHeaders <- length(subHeaders)
    if (LsubHeaders > 0) {
      mainHeader <- i[1]
      xMainHeader <- which(headers == mainHeader)
      ##
      if (length(xMainHeader) == 0) {
        xMainHeader <- which(headers == subHeaders[1])
        headers[xMainHeader] <- mainHeader
        ##
        if (LsubHeaders > 1) { 
          subHeaders <- subHeaders[2:LsubHeaders]
        }
        LsubHeaders <- LsubHeaders - 1
      }
      ##
      if (LsubHeaders > 0) {
        keepColumn <- xMainHeader
        removeColumn <- do.call(c, lapply(subHeaders, function(j) {
          which(headers == j)
        }))
        mspAncillaryParameters <- headersMerger(mspAncillaryParameters, keepColumn, removeColumn, LnumPeaks)
        ##
        headers <- headers[-removeColumn]
      }
    }
  }
  ##
  ##############################################################################
  ## To correct meta-variables with space characters
  ## meta-variables in the `metaVar0Space` array all should be in lowercase!!!
  metaVar0Space <- c("inchikey", "inchi", "smiles", "formula", "isotope", "ion_mode")
  ##
  for (i in metaVar0Space) {
    x_metaVar <- which(headers == i)
    if (length(x_metaVar) > 0) {
      mspAncillaryParameters[, x_metaVar] <- gsub(" ", "", mspAncillaryParameters[, x_metaVar])
    }
  }
  ##
  ##############################################################################
  ## To remove meta-variables without values
  xNULL <- do.call(c, lapply(1:length(headers), function(i) {
    x0 <- which(mspAncillaryParameters[, i] == "")
    if (length(x0) == LnumPeaks) {
      i
    }
  }))
  #
  if (length(xNULL) > 0) {
    headers <- headers[-xNULL]
    mspAncillaryParameters <- matrix(mspAncillaryParameters[, -xNULL], ncol = length(headers))
  }
  ##
  ##############################################################################
  ##
  xl_2 <- which(lengthMSPbolcks > 1)
  if (length(xl_2) > 0) {
    j <- 0
    for (i in xl_2) {
      l1 <- lengthMSPbolcks[i] - 1
      j <- j + i
      mspLibName <- append(mspLibName, rep(mspLibName[j], l1), after = j)
      j <- j - i + l1
    }
  }
  ##
  mspAncillaryParameters <- data.frame(cbind(mspLibName, mspAncillaryParameters))
  headers <- c("MSPfilename", headers)
  colnames(mspAncillaryParameters) <- headers
  rownames(mspAncillaryParameters) <- NULL
  ##############################################################################
  ############################ Precursor Type ##################################
  ##############################################################################  
  xPrecursorType <- which(headers == "precursor_type")
  if (length(xPrecursorType) > 0) {
    ##
    uncorrectedPrecursorType <- mspAncillaryParameters[, xPrecursorType]
    ##
    xIonMode <- which(headers == "ion_mode")
    if (length(xIonMode) > 0) {
      ionMode <- mspAncillaryParameters[, xIonMode]
    } else {
      ionMode <- NULL
    }
    ##
    correctedPrecursorType <- UFSA_precursorType_corrector(uncorrectedPrecursorType, ionMode)
  } else {
    correctedPrecursorType <- rep("", LnumPeaks)
  }
  ##############################################################################
  ############################ Retention Time ##################################
  ##############################################################################
  if (length(which(headers == "retention_time")) > 0) {
    retentionTimeVector <- mspAncillaryParameters$`retention_time`
  } else {
    retentionTimeVector <- rep("", LnumPeaks)
  }
  ##
  if (length(which(headers == "rtinseconds")) > 0) {
    rtinseconds <- gsub("[a-zA-Z]", "", mspAncillaryParameters$`rtinseconds`, ignore.case = TRUE)
    rtinseconds <- suppressWarnings(as.numeric(rtinseconds))/60
    ##
    x60 <- which((is.numeric(rtinseconds)) & (!is.nan(rtinseconds)) & (!is.infinite(rtinseconds)) & (rtinseconds != 0))
    retentionTimeVector[x60] <- rtinseconds[x60]
    ##
    mspAncillaryParameters$`rtinseconds` <- NULL
    mspAncillaryParameters$`retention_time` <- retentionTimeVector
  }
  ##
  x_min <- grep("m", retentionTimeVector, ignore.case = TRUE)
  if (length(x_min) > 0) {
    retentionTimeVector[x_min] <- gsub("[a-zA-Z]", "", retentionTimeVector[x_min], ignore.case = TRUE)
  }
  ##
  x_second <- grep("s", retentionTimeVector, ignore.case = TRUE)
  if (length(x_second) > 0) {
    retentionTimeVector[x_second] <- suppressWarnings(as.numeric(gsub("[a-zA-Z]", "", retentionTimeVector[x_second], ignore.case = TRUE)))/60
  }
  ##
  retentionTimeVector <- suppressWarnings(as.numeric(retentionTimeVector))
  retentionTimeVector[!is.numeric(retentionTimeVector) | is.na(retentionTimeVector) | is.nan(retentionTimeVector) | (retentionTimeVector == 0)] <- Inf
  ##
  retentionTimeVector <- round(retentionTimeVector, 4)
  ##############################################################################
  ##############################################################################
  ##############################################################################
  if (allowedNominalMass) {
    PrecursorMZ <- round(PrecursorMZ, 0)
    massIntegrationWindow <- 0
  }
  logFSdb <- data.frame(massIntegrationWindow, allowedNominalMass, allowedWeightedSpectralEntropy, noiseRemovalRatio*100)
  names(logFSdb) <- c("massWindowIntegration", "allowedNominalMass", "allowedWeightedSpectralEntropy", "noiseRemovalPercentage")
  ##############################################################################
  PrecursorMZ[!is.numeric(PrecursorMZ) | is.na(PrecursorMZ) | is.nan(PrecursorMZ) | (PrecursorMZ == 0)] <- Inf
  if (!allowedNominalMass) {
    PrecursorMZ <- round(PrecursorMZ, 6)
  }
  ##
  names(NumPeaks) <- NULL
  names(PrecursorMZ) <- NULL
  names(correctedPrecursorType) <- NULL
  names(retentionTimeVector) <- NULL
  names(spectralEntropyVector) <- NULL
  ##
  spectralEntropyVector <- round(spectralEntropyVector, 5)
  ##############################################################################
  FSdb <- list(logFSdb, PrecursorMZ, correctedPrecursorType, retentionTimeVector, NumPeaks, spectralEntropyVector, FragmentList, mspAncillaryParameters)
  names(FSdb) <- c("logFSdb", "PrecursorMZ", "Precursor Type", "Retention Time", "Num Peaks", "Spectral Entropy", "FragmentList", "MSPLibraryParameters")
  ##
  return(FSdb)
}

Try the IDSL.FSA package in your browser

Any scripts or data that you put into this service are public.

IDSL.FSA documentation built on July 9, 2023, 6:45 p.m.