tests/testthat/test_get_osw_query.R

context("Query")

test_that("test_getQuery",{
  analytes <- c("AAAM(UniMod:35)SILDK_3", "AAM(UniMod:35)GGAM(UniMod:35)VLLY_2")
  outData <- getQuery(maxFdrQuery = 0.03, oswMerged = TRUE, analytes = NULL,
                      filename = "DIAlignR/testFile.mzML", runType = "DIA_Proteomics", analyteInGroupLabel = FALSE)
  expOutput <- "SELECT PEPTIDE.MODIFIED_SEQUENCE || '_' || PRECURSOR.CHARGE AS transition_group_id,\n  RUN.FILENAME AS filename,\n  FEATURE.EXP_RT AS RT,\n  FEATURE.DELTA_RT AS delta_rt,\n  PRECURSOR.LIBRARY_RT AS assay_RT,\n  FEATURE_MS2.AREA_INTENSITY AS Intensity,\n  FEATURE.LEFT_WIDTH AS leftWidth,\n  FEATURE.RIGHT_WIDTH AS rightWidth,\n  SCORE_MS2.RANK AS peak_group_rank,\n  SCORE_MS2.QVALUE AS m_score,\n  TRANSITION_PRECURSOR_MAPPING.TRANSITION_ID AS transition_id\n  FROM PRECURSOR\n  INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID AND PRECURSOR.DECOY=0\n  INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID\n  INNER JOIN FEATURE ON FEATURE.PRECURSOR_ID = PRECURSOR.ID\n  INNER JOIN RUN ON RUN.ID = FEATURE.RUN_ID\n  INNER JOIN TRANSITION_PRECURSOR_MAPPING ON TRANSITION_PRECURSOR_MAPPING.PRECURSOR_ID = PRECURSOR.ID\n  LEFT JOIN FEATURE_MS2 ON FEATURE_MS2.FEATURE_ID = FEATURE.ID\n  LEFT JOIN SCORE_MS2 ON SCORE_MS2.FEATURE_ID = FEATURE.ID\n  WHERE SCORE_MS2.QVALUE < 0.03 AND RUN.FILENAME ='DIAlignR/testFile.mzML'\n  ORDER BY transition_group_id,\n  peak_group_rank;"
  expect_identical(outData, expOutput)
  outData <- getQuery(maxFdrQuery = 0.0005, oswMerged = FALSE, analytes = analytes,
                      filename = "DIAlignR/testFile.mzML", runType = "DIA_Proteomics", analyteInGroupLabel = TRUE)
  expOutput <- "SELECT PRECURSOR.GROUP_LABEL AS transition_group_id,\n  RUN.FILENAME AS filename,\n  FEATURE.EXP_RT AS RT,\n  FEATURE.DELTA_RT AS delta_rt,\n  PRECURSOR.LIBRARY_RT AS assay_RT,\n  FEATURE_MS2.AREA_INTENSITY AS Intensity,\n  FEATURE.LEFT_WIDTH AS leftWidth,\n  FEATURE.RIGHT_WIDTH AS rightWidth,\n  SCORE_MS2.RANK AS peak_group_rank,\n  SCORE_MS2.QVALUE AS m_score,\n  TRANSITION_PRECURSOR_MAPPING.TRANSITION_ID AS transition_id\n  FROM PRECURSOR\n  INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID AND PRECURSOR.DECOY=0\n  INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID\n  INNER JOIN FEATURE ON FEATURE.PRECURSOR_ID = PRECURSOR.ID\n  INNER JOIN RUN ON RUN.ID = FEATURE.RUN_ID\n  INNER JOIN TRANSITION_PRECURSOR_MAPPING ON TRANSITION_PRECURSOR_MAPPING.PRECURSOR_ID = PRECURSOR.ID\n  LEFT JOIN FEATURE_MS2 ON FEATURE_MS2.FEATURE_ID = FEATURE.ID\n  LEFT JOIN SCORE_MS2 ON SCORE_MS2.FEATURE_ID = FEATURE.ID\n  WHERE SCORE_MS2.QVALUE < 5e-04 AND transition_group_id IN ('AAAM(UniMod:35)SILDK_3','AAM(UniMod:35)GGAM(UniMod:35)VLLY_2')\n  ORDER BY transition_group_id,\n  peak_group_rank;"
  expect_identical(outData, expOutput)
})

test_that("test_getAnalytesQuery",{
  runs <- c("run1" = "hroest_K120809_Strep0%PlasmaBiolRepl2_R04_SW_filt",
            "run2" = "hroest_K120809_Strep10%PlasmaBiolRepl2_R04_SW_filt")
  outData <- getAnalytesQuery(maxFdrQuery = 0.03, oswMerged = TRUE, filename = "DIAlignR/testFile.mzML",
                              runType = "DIA_Metabolomics", analyteInGroupLabel = FALSE)
  expOutput <- "SELECT COMPOUND.ID AS compound_id,\n    COMPOUND.COMPOUND_NAME || '_' || COMPOUND.ADDUCTS AS transition_group_id,\n    RUN.FILENAME AS filename,\n    SCORE_MS2.RANK AS peak_group_rank,\n    SCORE_MS2.QVALUE AS m_score\n    FROM PRECURSOR\n    INNER JOIN PRECURSOR_COMPOUND_MAPPING ON PRECURSOR.ID = PRECURSOR_COMPOUND_MAPPING.PRECURSOR_ID\n    INNER JOIN COMPOUND ON PRECURSOR_COMPOUND_MAPPING.COMPOUND_ID = COMPOUND.ID\n    INNER JOIN FEATURE ON FEATURE.PRECURSOR_ID = PRECURSOR.ID\n    INNER JOIN RUN ON RUN.ID = FEATURE.RUN_ID\n    LEFT JOIN SCORE_MS2 ON SCORE_MS2.FEATURE_ID = FEATURE.ID\n    WHERE COMPOUND.DECOY = 0 AND SCORE_MS2.QVALUE <  0.03 AND RUN.FILENAME ='DIAlignR/testFile.mzML'\n    ORDER BY transition_group_id,\n    peak_group_rank;"
  expect_identical(outData, expOutput)
  outData <- getAnalytesQuery(maxFdrQuery = 0.0005, oswMerged = FALSE, filename = "DIAlignR/testFile.mzML",
                              runType = "DIA_Proteomics", analyteInGroupLabel = TRUE)
  expOutput <- "SELECT PRECURSOR.GROUP_LABEL AS transition_group_id,\n  RUN.FILENAME AS filename,\n  SCORE_MS2.RANK AS peak_group_rank,\n  SCORE_MS2.QVALUE AS m_score,\n  TRANSITION_PRECURSOR_MAPPING.TRANSITION_ID AS transition_id\n  FROM PRECURSOR\n  INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID AND PRECURSOR.DECOY=0\n  INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID\n  INNER JOIN FEATURE ON FEATURE.PRECURSOR_ID = PRECURSOR.ID\n  INNER JOIN RUN ON RUN.ID = FEATURE.RUN_ID\n  INNER JOIN TRANSITION_PRECURSOR_MAPPING ON TRANSITION_PRECURSOR_MAPPING.PRECURSOR_ID = PRECURSOR.ID\n  LEFT JOIN SCORE_MS2 ON SCORE_MS2.FEATURE_ID = FEATURE.ID\n  WHERE SCORE_MS2.QVALUE < 5e-04\n  ORDER BY transition_group_id,\n  peak_group_rank;"
  expect_identical(outData, expOutput)
})

test_that("test_getPrecursorsQuery",{
  outData <- getPrecursorsQuery(runType = "DIA_Proteomics")
  expOutput <- "SELECT DISTINCT PRECURSOR.ID AS transition_group_id,
      TRANSITION_PRECURSOR_MAPPING.TRANSITION_ID AS transition_id,
      PEPTIDE.ID AS peptide_id,
      PEPTIDE.MODIFIED_SEQUENCE AS sequence,
      PRECURSOR.CHARGE AS charge,
      PRECURSOR.GROUP_LABEL AS group_label
      FROM PRECURSOR
      INNER JOIN TRANSITION_PRECURSOR_MAPPING ON TRANSITION_PRECURSOR_MAPPING.PRECURSOR_ID = PRECURSOR.ID
      INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID = PRECURSOR.ID
      INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID
      INNER JOIN (
      SELECT PEPTIDE_ID
      FROM SCORE_PEPTIDE
      WHERE SCORE_PEPTIDE.CONTEXT = $CONTEXT AND SCORE_PEPTIDE.QVALUE < $FDR
      ) AS SCORE_PEPTIDE ON SCORE_PEPTIDE.PEPTIDE_ID = PEPTIDE.ID
      WHERE PRECURSOR.DECOY = 0
      ORDER BY peptide_id, transition_group_id, transition_id;"
  expect_identical(outData, expOutput)

  ## Test IPF
  outData_ipf <- getPrecursorsQuery(runType = "DIA_IPF")
  expOutput_ipf <- "SELECT DISTINCT PRECURSOR.ID AS transition_group_id,
      TRANSITION_PRECURSOR_MAPPING.TRANSITION_ID AS transition_id,
      PEPTIDE.ID AS peptide_id,
      PEPTIDE.MODIFIED_SEQUENCE AS sequence,
      PRECURSOR.CHARGE AS charge,
      PRECURSOR.GROUP_LABEL AS group_label
      --TRANSITION.DETECTING AS detecting,
	    --TRANSITION.IDENTIFYING AS identifying
      FROM PRECURSOR
      INNER JOIN TRANSITION_PRECURSOR_MAPPING ON TRANSITION_PRECURSOR_MAPPING.PRECURSOR_ID = PRECURSOR.ID
      INNER JOIN (
      SELECT *
      FROM TRANSITION
      WHERE (
      TRANSITION.DETECTING=TRUE
      OR TRANSITION.IDENTIFYING=$USE_IDENTIFYING --- #identifying
          )
      ) AS TRANSITION ON TRANSITION.ID = TRANSITION_PRECURSOR_MAPPING.TRANSITION_ID
      INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID = PRECURSOR.ID
      INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID
      INNER JOIN (
      SELECT PEPTIDE_ID
      FROM SCORE_PEPTIDE
      WHERE SCORE_PEPTIDE.CONTEXT = $CONTEXT AND SCORE_PEPTIDE.QVALUE < $FDR
      ) AS SCORE_PEPTIDE ON SCORE_PEPTIDE.PEPTIDE_ID = PEPTIDE.ID
      WHERE PRECURSOR.DECOY = 0
      ORDER BY peptide_id, transition_group_id, transition_id;"
  expect_identical(outData_ipf, expOutput_ipf)
})

test_that("test_getFeaturesQuery",{
  outData <- getFeaturesQuery(runType = "DIA_Proteomics")
  expOutput <- "SELECT PRECURSOR.ID AS transition_group_id,
    FEATURE.ID AS feature_id,
    FEATURE.EXP_RT AS RT,
    FEATURE_MS2.AREA_INTENSITY AS intensity,
    FEATURE.LEFT_WIDTH AS leftWidth,
    FEATURE.RIGHT_WIDTH AS rightWidth,
    SCORE_MS2.RANK AS peak_group_rank,
    SCORE_MS2.QVALUE AS m_score
    FROM PRECURSOR
    INNER JOIN FEATURE ON FEATURE.PRECURSOR_ID = PRECURSOR.ID
    INNER JOIN RUN ON RUN.ID = FEATURE.RUN_ID
    LEFT JOIN (
        SELECT FEATURE_ID, AREA_INTENSITY
        FROM FEATURE_MS2
    ) AS FEATURE_MS2 ON FEATURE_MS2.FEATURE_ID = FEATURE.ID
    INNER JOIN (
        SELECT FEATURE_ID, RANK, QVALUE
        FROM SCORE_MS2
        WHERE SCORE_MS2.QVALUE < $FDR
        ) AS SCORE_MS2 ON SCORE_MS2.FEATURE_ID = FEATURE.ID
    WHERE PRECURSOR.DECOY = 0 AND RUN.ID = $runID
    ORDER BY transition_group_id, peak_group_rank;"
  expect_identical(outData, expOutput)

  ## Test IPF
  outData_ipf <- getFeaturesQuery(runType = "DIA_IPF")
  expOutput_ipf <- "SELECT PRECURSOR.ID AS transition_group_id,
    FEATURE.ID AS feature_id,
    FEATURE.EXP_RT AS RT,
    FEATURE_MS2.AREA_INTENSITY AS intensity,
    FEATURE.LEFT_WIDTH AS leftWidth,
    FEATURE.RIGHT_WIDTH AS rightWidth,
    SCORE_MS2.RANK AS peak_group_rank,
    SCORE_MS2.QVALUE AS ms2_m_score,
    SCORE_IPF.QVALUE AS m_score
    FROM PRECURSOR
    INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID = PRECURSOR.ID
    INNER JOIN FEATURE ON FEATURE.PRECURSOR_ID = PRECURSOR.ID
    INNER JOIN RUN ON RUN.ID = FEATURE.RUN_ID
    LEFT JOIN (
        SELECT FEATURE_ID, AREA_INTENSITY
        FROM FEATURE_MS2
    ) AS FEATURE_MS2 ON FEATURE_MS2.FEATURE_ID = FEATURE.ID
    INNER JOIN (
        SELECT FEATURE_ID, RANK, QVALUE
        FROM SCORE_MS2
        WHERE SCORE_MS2.QVALUE < $FDR
        ) AS SCORE_MS2 ON SCORE_MS2.FEATURE_ID = FEATURE.ID
    INNER JOIN (
        SELECT FEATURE_ID, QVALUE, PEPTIDE_ID
        FROM SCORE_IPF
        WHERE SCORE_IPF.QVALUE < $IPF_FDR
        ) AS SCORE_IPF ON SCORE_IPF.FEATURE_ID = FEATURE.ID
    WHERE PRECURSOR.DECOY = 0 AND RUN.ID = $runID
    AND SCORE_IPF.PEPTIDE_ID+1 = PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID
    ORDER BY transition_group_id, peak_group_rank;"
  expect_identical(outData_ipf, expOutput_ipf)
})

test_that("test_getPrecursorsQueryID",{
  outData <- getPrecursorsQueryID(c(32L, 43L), runType = "DIA_Proteomics")
  expOutput <- "SELECT PRECURSOR.ID AS transition_group_id,
                  TRANSITION_PRECURSOR_MAPPING.TRANSITION_ID AS transition_id,
                  PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID AS peptide_id,
                  PEPTIDE.MODIFIED_SEQUENCE AS sequence,
                  PRECURSOR.CHARGE AS charge,
                  PRECURSOR.GROUP_LABEL AS group_label
                  FROM PRECURSOR
                  INNER JOIN TRANSITION_PRECURSOR_MAPPING ON TRANSITION_PRECURSOR_MAPPING.PRECURSOR_ID = PRECURSOR.ID
                  INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID = PRECURSOR.ID
                  INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID
                  WHERE  transition_group_id IN ('32','43')
                  ORDER BY peptide_id, transition_group_id, transition_id;"
  expect_identical(outData, expOutput)
})

test_that("test_getTransitionsQuery",{
  outData <- getTransitionsQuery(runType = "DIA_Proteomics")
  expOutput <- "SELECT PRECURSOR.ID AS transition_group_id,
  FEATURE.ID AS feature_id,
  FEATURE.EXP_RT AS RT,
  FEATURE_TRANSITION.AREA_INTENSITY AS intensity,
  FEATURE.LEFT_WIDTH AS leftWidth,
  FEATURE.RIGHT_WIDTH AS rightWidth,
  SCORE_MS2.RANK AS peak_group_rank,
  SCORE_MS2.QVALUE AS m_score
  FROM PRECURSOR
  INNER JOIN FEATURE ON FEATURE.PRECURSOR_ID = PRECURSOR.ID
  INNER JOIN RUN ON RUN.ID = FEATURE.RUN_ID
  LEFT JOIN (
    SELECT FEATURE_ID, TRANSITION_ID, AREA_INTENSITY
    FROM FEATURE_TRANSITION
    ) AS FEATURE_TRANSITION ON FEATURE.ID = FEATURE_TRANSITION.FEATURE_ID
  INNER JOIN (
      SELECT FEATURE_ID, RANK, QVALUE
      FROM SCORE_MS2
      WHERE SCORE_MS2.QVALUE < $FDR
      ) AS SCORE_MS2 ON SCORE_MS2.FEATURE_ID = FEATURE.ID
  WHERE PRECURSOR.DECOY = 0 AND RUN.ID = $runID
  ORDER BY transition_group_id, peak_group_rank, FEATURE_TRANSITION.TRANSITION_ID;"
  expect_identical(outData, expOutput)
})
shubham1637/DIAlign documentation built on March 27, 2023, 7:12 a.m.