R/dbCreators.R

Defines functions sqlCreate_version createMetaSQL createXMLSQL createSpectraSQL insertIntoMassTable insertIntoIndividualSpectra

Documented in createMetaSQL createSpectraSQL createXMLSQL insertIntoIndividualSpectra insertIntoMassTable sqlCreate_version

# version table -----------------------------------------------------------



#' Create version table 
#'
#' @param userDBCon sqlite connection
#'
#' @return NA
#' @export
#'
sqlCreate_version <- function(userDBCon) {
  if (!DBI::dbExistsTable(userDBCon, "version")) {
    
    # Add version table
    DBI::dbWriteTable(conn = userDBCon,
                      name = "version", # SQLite table to insert into
                      tempRepo::sqlTableArchitecture(numberScans = 1)$version, # Insert single row into DB
                      append = TRUE, # Append to existing table
                      overwrite = FALSE) # Do not overwrite
  }
}



# metadata table ----------------------------------------------------------



#' createMetaSQL
#'
#' @param sampleID NA
#' @param userDBCon NA
#'
#' @return NA
#' @export
#'

createMetaSQL <- function(sampleID,
                          userDBCon){
  
  if (!DBI::dbExistsTable(userDBCon, "metaData")) {
    tempRepo::sql_CreatemetaData(userDBCon)
  }  
  
  query <- DBI::dbSendStatement(userDBCon, 
                                "INSERT INTO 'metaData' (
                                'Strain_ID')
                                 VALUES (?)")
  
  DBI::dbBind(query, list(sampleID))
  
  DBI::dbClearResult(query)
  
}



# XML table ---------------------------------------------------------------


#' createXMLSQL
#'
#' @param rawDataFilePath NA
#' @param userDBCon NA
#' @param mzML_con NA
#'
#' @return NA
#' @export
#'

createXMLSQL <- function(rawDataFilePath,
                         userDBCon,
                         mzML_con){
  
  xmlFile <- tempRepo::serializeXML(rawDataFilePath)
  
  mzMLHash <- tempRepo::hashR(xmlFile)
  
  if (!DBI::dbExistsTable(userDBCon, "XML")) {
    tempRepo::sql_CreatexmlTable(userDBCon)
  }
  
  
  # Get instrument Info
  instInfo <- mzR::instrumentInfo(mzML_con)
  
  # # Find acquisitonInfo from mzML file
  # acquisitonInfo <- tempRepo::findAcquisitionInfo(rawDataFilePath,
  #                                                 instInfo$manufacturer)
  # 
  # if ("Instrument_MetaFile" %in% ls(acquisitonInfo)) { 
  #   sqlDataFrame$XML$Instrument_MetaFile <- tempRepo::serial(acquisitonInfo$Instrument_MetaFile)
  # }
  
  
  query <- DBI::dbSendStatement(userDBCon, 
                                "INSERT INTO 'XML'(
                                'XMLHash',
                                'XML',
                                'manufacturer',
                                'model',
                                'ionization',
                                'analyzer',
                                'detector',
                                'Instrument_MetaFile')
                                VALUES ($XMLHash,
                                $XML,
                                $manufacturer,
                                $model,
                                $ionization,
                                $analyzer,
                                $detector,
                                $Instrument_MetaFile);")
  
  DBI::dbBind(query, list(XMLHash = mzMLHash,
                          XML = list(xmlFile),
                          manufacturer = instInfo$manufacturer[[1]],
                          model = instInfo$model[[1]],
                          ionization = instInfo$ionisation[[1]],
                          analyzer = instInfo$analyzer[[1]],
                          detector = instInfo$detector[[1]],
                          Instrument_MetaFile = "Unkown"))
  
  DBI::dbClearResult(query)
  
  
  
  return(list(mzMLHash = mzMLHash,
              mzMLInfo = instInfo))
}



#' createSpectraSQL
#'
#' @param mzML_con NA
#' @param scanNumber NA
#' @param userDBCon NA
#' @param sampleID NA
#' @param XMLinfo NA
#' @param rawDataFilePath NA
#' @param smallRangeEnd end of mass region for small mol, if m/z above this- will be classified as "protein" spectrum
#'
#' @return NA
#' @export
#'

createSpectraSQL <- function(mzML_con, 
                             scanNumber,
                             userDBCon,
                             sampleID,
                             XMLinfo,
                             rawDataFilePath,
                             smallRangeEnd = 6000){

  spectraImport <- mzR::peaks(mzML_con)
  
  spectraImport <- tempRepo::spectrumMatrixToMALDIqaunt(spectraImport)
  
  
  # logical vector of maximum masses of mass vectors. True = small mol, False = protein
  smallIndex <- unlist(lapply(spectraImport, function(x) max(x@mass)))
  smallIndex <- smallIndex < smallRangeEnd
  
  
  

# Create tables in DB if they don't exist ---------------------------------

  if (!DBI::dbExistsTable(userDBCon, "IndividualSpectra")) {
    tempRepo::sql_CreateIndividualSpectra(userDBCon)
  }  
  if (!DBI::dbExistsTable(userDBCon, "massTable")) {
    tempRepo::sql_CreatemassTable(userDBCon)
  }
  
  # Small mol spectra -------------------------------------------------------
  
  if (any(smallIndex)) { 
    env <- tempRepo::processXMLIndSpectra(spectraImport = spectraImport,
                                          smallOrProtein = "small",
                                          index = smallIndex)
    tempRepo::insertIntoIndividualSpectra(env = env,
                                          XMLinfo = XMLinfo,
                                          userDBCon = userDBCon,
                                          acquisitonInfo = NULL,
                                          sampleID = sampleID)
    tempRepo::insertIntoMassTable(env = env,
                                  userDBCon = userDBCon)
  }
  # Protein Spectra ---------------------------------------------------------
  
  if (any(!smallIndex)) {
    
    env <- tempRepo::processXMLIndSpectra(spectraImport = spectraImport,
                                          smallOrProtein = "protein",
                                          index = !smallIndex)
    tempRepo::insertIntoIndividualSpectra(env = env,
                                          XMLinfo = XMLinfo,
                                          userDBCon = userDBCon,
                                          acquisitonInfo = NULL,
                                          sampleID = sampleID)
    tempRepo::insertIntoMassTable(env = env,
                                  userDBCon = userDBCon)
  }
  
  
}






#' Write massTable data to SQLite
#'
#' @param env environment 
#' @param userDBCon checked database connection
#'
#' @return nothing, writes to database
#' @export
#'
insertIntoMassTable <- function(env,
                                userDBCon){
  
  if (length(env$spectrumMassHash) != length(env$massVector)) {
    stop("Error in tempRepo::insertIntoMassTable(): tempRepo::processXMLIndSpectra() provided
                    spectrumMassHash and massVector variables with different lengths")
  } else { 
    query <- DBI::dbSendStatement(userDBCon, 
                                  "INSERT INTO 'massTable'(
                              'spectrumMassHash',
                              'massVector')
                              VALUES (
                              $spectrumMassHash,
                              $massVector);")
    
    DBI::dbBind(query, list(spectrumMassHash = env$spectrumMassHash,
                            massVector = env$massVector)
    )
    
    DBI::dbClearResult(query)
    
  }
}





#' Write individual spectra to SQLite
#'
#' @param env environment 
#' @param XMLinfo xmlinfo
#' @param userDBCon checked database connection
#' @param acquisitonInfo acquisitonInfo
#' @param sampleID sampleID
#'
#' @return nothing, writes to database
#' @export
#'
insertIntoIndividualSpectra <- function(env,
                                        XMLinfo,
                                        userDBCon,
                                        acquisitonInfo,
                                        sampleID){
  
  temp <- base::lengths(base::mget(base::ls(env),
                                   envir = as.environment(env))) 
  
  # ensure equal lengths
  if ((sum(temp)/temp[[1]]) != length(temp)) {
    stop(glue::glue("Error in tempRepo::insertIntoIndividualSpectra(): tempRepo::processXMLIndSpectra() provided variables of differing lengths: \n ",  
                    paste0(names(temp),"=",temp, collapse = ", ")))
  } else { 
    query <- DBI::dbSendStatement(userDBCon, 
                                  "INSERT INTO 'IndividualSpectra'(
                                  'spectrumMassHash',
                                  'spectrumIntensityHash',
                                  'XMLHash',
                                  'Strain_ID',
                                  'MassError',
                                  'AcquisitionDate',
                                  'peakMatrix',
                                  'spectrumIntensity',
                                  'minMass',
                                  'maxMass',
                                  'ignore')
                                  VALUES ($spectrumMassHash,
                                  $spectrumIntensityHash,
                                  $XMLHash,
                                  $Strain_ID,
                                  $MassError,
                                  $AcquisitionDate,
                                  $peakMatrix,
                                  $spectrumIntensity,
                                  $minMass,
                                  $maxMass,
                                  $ignore
                                  );"
    )
    
    
    if (is.null(XMLinfo$mzMLInfo$AcquisitionDate)) {
      XMLinfo$mzMLInfo$AcquisitionDate <- NA
    }  
    # if (is.null(acquisitonInfo$MassError)) {
    #   acquisitonInfo$MassError <- NA
    # } 
    
    mzMLHash <- rep(XMLinfo$mzMLHash, times = temp[[1]])
    acquisitionDate <- rep(XMLinfo$mzMLInfo$AcquisitionDate, times = temp[[1]])
    MassError <- rep(NA, times = temp[[1]])
    ignore <- rep(0, times = temp[[1]])
    sampleID <- rep(sampleID[[1]], times = temp[[1]])
    
    DBI::dbBind(query, list(spectrumMassHash = env$spectrumMassHash,
                            spectrumIntensityHash = env$spectrumIntensityHash,
                            XMLHash = mzMLHash,
                            Strain_ID = sampleID,
                            MassError = MassError,
                            AcquisitionDate = acquisitionDate,
                            peakMatrix = env$peakMatrix,
                            spectrumIntensity = env$spectrumIntensity,
                            minMass = env$minMass,
                            maxMass = env$maxMass,
                            ignore = ignore
    ))
    
    
    DBI::dbClearResult(query)
    
  }
}
chasemc/tempRepo documentation built on May 28, 2019, 7:32 p.m.