tests/testthat/test-4-moduleCohortRelationship.R

test_that("Testing executeCohortRelationshipDiagnostics", {
  skip_if(skipCdmTests, "cdm settings not configured")

  # manually create cohort table and load to table
  # for the logic to work - there has to be some overlap of the comparator cohort over target cohort
  # note - we will not be testing offset in this test. it is expected to work as it is a simple substraction

  temporalStartDays <- c(0)
  temporalEndDays <- c(0)

  targetCohort <- dplyr::tibble(
    cohortDefinitionId = c(1),
    subjectId = c(1),
    cohortStartDate = c(as.Date("1900-01-15")),
    cohortEndDate = c(as.Date("1900-01-31"))
  ) # target cohort always one row

  comparatorCohort <- # all records here overlap with targetCohort
    dplyr::tibble(
      cohortDefinitionId = c(10, 10, 10),
      subjectId = c(1, 1, 1),
      cohortStartDate = c(
        as.Date("1900-01-01"),
        # starts before target cohort start
        as.Date("1900-01-22"),
        # starts during target cohort period and ends during target cohort period
        as.Date("1900-01-31")
      ),
      cohortEndDate = c(
        as.Date("1900-01-20"),
        as.Date("1900-01-29"),
        as.Date("1900-01-31")
      )
    )

  cohort <- dplyr::bind_rows(
    targetCohort,
    comparatorCohort,
    targetCohort %>%
      dplyr::mutate(cohortDefinitionId = 2),
    comparatorCohort %>%
      dplyr::mutate(cohortDefinitionId = 20)
  )

  connectionCohortRelationship <-
    DatabaseConnector::connect(connectionDetails)

  # to do - with incremental = FALSE
  with_dbc_connection(connectionCohortRelationship, {
    sysTime <- as.numeric(Sys.time()) * 100000
    tableName <- paste0("cr", sysTime)
    observationTableName <- paste0("op", sysTime)

    DatabaseConnector::insertTable(
      connection = connectionCohortRelationship,
      databaseSchema = cohortDatabaseSchema,
      tableName = tableName,
      data = cohort,
      dropTableIfExists = TRUE,
      createTable = TRUE,
      tempTable = FALSE,
      camelCaseToSnakeCase = TRUE,
      progressBar = FALSE
    )

    cohortDefinitionSet <-
      cohort %>%
      dplyr::select(cohortDefinitionId) %>%
      dplyr::distinct() %>%
      dplyr::rename("cohortId" = "cohortDefinitionId") %>%
      dplyr::rowwise() %>%
      dplyr::mutate(json = RJSONIO::toJSON(list(
        cohortId = cohortId,
        randomString = c(
          sample(x = LETTERS, 5, replace = TRUE),
          sample(x = LETTERS, 4, replace = TRUE),
          sample(LETTERS, 1, replace = TRUE)
        )
      ))) %>%
      dplyr::ungroup() %>%
      dplyr::mutate(
        sql = json,
        checksum = CohortDiagnostics:::computeChecksum(json)
      )


    exportFolder <- tempdir()
    exportFile <- tempfile()

    unlink(
      x = exportFolder,
      recursive = TRUE,
      force = TRUE
    )
    dir.create(
      path = exportFolder,
      showWarnings = FALSE,
      recursive = TRUE
    )

    CohortDiagnostics:::executeCohortRelationshipDiagnostics(
      connection = connectionCohortRelationship,
      databaseId = "testDataSourceName",
      exportFolder = exportFolder,
      cohortDatabaseSchema = cohortDatabaseSchema,
      cdmDatabaseSchema = cdmDatabaseSchema,
      cohortTable = tableName,
      tempEmulationSchema = NULL,
      cohortDefinitionSet = cohortDefinitionSet %>%
        dplyr::filter(cohortId %in% c(1, 10)),
      temporalCovariateSettings = list(
        temporalStartDays = c(-365, -30),
        temporalEndDays = c(-31, -1)
      ),
      minCellCount = 0,
      recordKeepingFile = paste0(exportFile, "recordKeeping"),
      incremental = TRUE,
      batchSize = 2
    )

    recordKeepingFileData <-
      readr::read_csv(
        file = paste0(exportFile, "recordKeeping"),
        col_types = readr::cols()
      )

    # testing if check sum if written to field called targetChecksum
    testthat::expect_true("targetChecksum" %in% colnames(recordKeepingFileData))
    testthat::expect_true("comparatorChecksum" %in% colnames(recordKeepingFileData))
    testthat::expect_true("checksum" %in% colnames(recordKeepingFileData))

    testthat::expect_equal(
      object = recordKeepingFileData %>%
        dplyr::filter(cohortId == 1) %>%
        dplyr::filter(comparatorId == 10) %>%
        dplyr::select(checksum) %>%
        dplyr::pull(checksum),
      expected = recordKeepingFileData %>%
        dplyr::filter(cohortId == 1) %>%
        dplyr::filter(comparatorId == 10) %>%
        dplyr::mutate(
          checksum2 = paste0(
            targetChecksum,
            comparatorChecksum
          )
        ) %>%
        dplyr::pull(checksum2)
    )



    ## testing if subset works
    allCohortIds <- cohortDefinitionSet %>%
      dplyr::filter(cohortId %in% c(1, 10, 2)) %>%
      dplyr::select(cohortId, checksum) %>%
      dplyr::rename(
        targetCohortId = cohortId,
        targetChecksum = checksum
      ) %>%
      dplyr::distinct()

    combinationsOfPossibleCohortRelationships <- allCohortIds %>%
      tidyr::crossing(
        allCohortIds %>%
          dplyr::rename(
            comparatorCohortId = targetCohortId,
            comparatorChecksum = targetChecksum
          )
      ) %>%
      dplyr::filter(targetCohortId != comparatorCohortId) %>%
      dplyr::arrange(targetCohortId, comparatorCohortId) %>%
      dplyr::mutate(checksum = paste0(targetChecksum, comparatorChecksum))

    subset <- CohortDiagnostics:::subsetToRequiredCombis(
      combis = combinationsOfPossibleCohortRelationships,
      task = "runCohortRelationship",
      incremental = TRUE,
      recordKeepingFile = paste0(exportFile, "recordKeeping")
    ) %>% dplyr::tibble()

    ### subset should not have the combinations in record keeping file
    shouldBeDfOfZeroRows <- subset %>%
      dplyr::inner_join(
        recordKeepingFileData %>%
          dplyr::select(
            "cohortId",
            "comparatorId"
          ) %>%
          dplyr::distinct() %>%
          dplyr::rename(
            targetCohortId = "cohortId",
            comparatorCohortId = "comparatorId"
          ),
        by = c("targetCohortId", "comparatorCohortId")
      )

    testthat::expect_equal(
      object = nrow(shouldBeDfOfZeroRows),
      expected = 0,
      info = "Looks like subset and record keeping file did not match."
    )


    ## running again by adding cohort 2, to previously run 1 and 10
    CohortDiagnostics:::executeCohortRelationshipDiagnostics(
      connection = connectionCohortRelationship,
      databaseId = "testDataSourceName",
      exportFolder = exportFolder,
      cohortDatabaseSchema = cohortDatabaseSchema,
      cdmDatabaseSchema = cdmDatabaseSchema,
      cohortTable = tableName,
      tempEmulationSchema = NULL,
      cohortDefinitionSet = cohortDefinitionSet %>%
        dplyr::filter(cohortId %in% c(1, 10, 2)),
      temporalCovariateSettings = list(
        temporalStartDays = c(-365, -30),
        temporalEndDays = c(-31, -1)
      ),
      minCellCount = 0,
      recordKeepingFile = paste0(exportFile, "recordKeeping"),
      incremental = TRUE,
      batchSize = 2
    )

    recordKeepingFileData2 <-
      readr::read_csv(
        file = paste0(exportFile, "recordKeeping"),
        col_types = readr::cols()
      )
    # record keeping file should have 6 combinations - for 3 cohorts
    testthat::expect_equal(
      object = nrow(recordKeepingFileData2),
      expected = 3 * 2 * 1
    )

    # record keeping file should have 4 additional combinations
    testthat::expect_equal(
      object = recordKeepingFileData2 %>%
        dplyr::anti_join(
          recordKeepingFileData %>%
            dplyr::select(
              cohortId,
              comparatorId
            ),
          by = c("cohortId", "comparatorId")
        ) %>%
        nrow(),
      expected = 4
    )


    # check what happens for an unrelated cohort combination
    allCohortIds <- cohortDefinitionSet %>%
      dplyr::filter(cohortId %in% c(2, 20)) %>%
      dplyr::select(cohortId, checksum) %>%
      dplyr::rename(
        targetCohortId = cohortId,
        targetChecksum = checksum
      ) %>%
      dplyr::distinct()

    combinationsOfPossibleCohortRelationships <- allCohortIds %>%
      tidyr::crossing(
        allCohortIds %>%
          dplyr::rename(
            comparatorCohortId = targetCohortId,
            comparatorChecksum = targetChecksum
          )
      ) %>%
      dplyr::filter(targetCohortId != comparatorCohortId) %>%
      dplyr::arrange(targetCohortId, comparatorCohortId) %>%
      dplyr::mutate(checksum = paste0(targetChecksum, comparatorChecksum))

    subset <- CohortDiagnostics:::subsetToRequiredCombis(
      combis = combinationsOfPossibleCohortRelationships,
      task = "runCohortRelationship",
      incremental = TRUE,
      recordKeepingFile = paste0(exportFile, "recordKeeping")
    ) %>% dplyr::tibble()

    ### subset should be two rows in subsets that are not in record keeping file
    shouldBeTwoRows <- subset %>%
      dplyr::anti_join(
        recordKeepingFileData2 %>%
          dplyr::select(
            "cohortId",
            "comparatorId"
          ) %>%
          dplyr::rename(
            targetCohortId = cohortId,
            comparatorCohortId = comparatorId
          ),
        by = c("targetCohortId", "comparatorCohortId")
      )

    testthat::expect_equal(
      object = nrow(shouldBeTwoRows),
      expected = 2,
      info = "Looks like subset and record keeping file did not match, Two new cohorts should have run."
    )
  })
})





test_that("Testing cohort relationship logic - incremental FALSE", {
  skip_if(skipCdmTests, "cdm settings not configured")

  # manually create cohort table and load to table
  # for the logic to work - there has to be some overlap of the comparator cohort over target cohort
  # note - we will not be testing offset in this test. it is expected to work as it is a simple substraction

  temporalStartDays <- c(0)
  temporalEndDays <- c(0)

  targetCohort <- dplyr::tibble(
    cohortDefinitionId = c(1),
    subjectId = c(1),
    cohortStartDate = c(as.Date("1900-01-15")),
    cohortEndDate = c(as.Date("1900-01-31"))
  ) # target cohort always one row

  comparatorCohort <- # all records here overlap with targetCohort
    dplyr::tibble(
      cohortDefinitionId = c(10, 10, 10),
      subjectId = c(1, 1, 1),
      cohortStartDate = c(
        as.Date("1900-01-01"), # starts before target cohort start
        as.Date("1900-01-22"), # starts during target cohort period and ends during target cohort period
        as.Date("1900-01-31")
      ),
      cohortEndDate = c(
        as.Date("1900-01-20"),
        as.Date("1900-01-29"),
        as.Date("1900-01-31")
      )
    )

  cohort <- dplyr::bind_rows(targetCohort, comparatorCohort)

  connectionCohortRelationship <-
    DatabaseConnector::connect(connectionDetails)

  # to do - with incremental = FALSE
  with_dbc_connection(connectionCohortRelationship, {
    sysTime <- as.numeric(Sys.time()) * 100000
    tableName <- paste0("cr", sysTime)
    observationTableName <- paste0("op", sysTime)

    DatabaseConnector::insertTable(
      connection = connectionCohortRelationship,
      databaseSchema = cohortDatabaseSchema,
      tableName = tableName,
      data = cohort,
      dropTableIfExists = TRUE,
      createTable = TRUE,
      tempTable = FALSE,
      camelCaseToSnakeCase = TRUE,
      progressBar = FALSE
    )

    cohortRelationship <- runCohortRelationshipDiagnostics(
      connection = connectionCohortRelationship,
      cohortDatabaseSchema = cohortDatabaseSchema,
      cohortTable = tableName,
      targetCohortIds = c(1),
      comparatorCohortIds = c(10),
      relationshipDays = dplyr::tibble(
        startDay = temporalStartDays,
        endDay = temporalEndDays
      )
    )

    sqlDrop <-
      "IF OBJECT_ID('@cohort_database_schema.@cohort_relationship_cohort_table', 'U') IS NOT NULL
            DROP TABLE @cohort_database_schema.@cohort_relationship_cohort_table;"
    DatabaseConnector::renderTranslateExecuteSql(
      connection = connectionCohortRelationship,
      sql = sqlDrop,
      cohort_database_schema = cohortDatabaseSchema,
      cohort_relationship_cohort_table = tableName,
      profile = FALSE,
      progressBar = FALSE
    )

    cohortRelationshipT1C10 <- cohortRelationship %>%
      dplyr::filter(cohortId == 1) %>%
      dplyr::filter(comparatorCohortId == 10)

    testthat::expect_equal(
      object = cohortRelationshipT1C10$subCsBeforeTs,
      expected = 1
    ) # there is one subject in comparator that starts before target

    testthat::expect_equal(
      object = cohortRelationshipT1C10$subCsBeforeTe,
      expected = 1
    ) # there is one subject in comparator that starts before target end

    testthat::expect_equal(
      object = cohortRelationshipT1C10$subCsAfterTs,
      expected = 1
    ) # there is one subject in comparator that starts after target start

    testthat::expect_equal(
      object = cohortRelationshipT1C10$subCsAfterTs,
      expected = 1
    ) # there is one subject in comparator that starts after target start

    testthat::expect_equal(
      object = cohortRelationshipT1C10$subCsOnTe,
      expected = 1
    ) # there is one subject in comparator that starts on target end

    testthat::expect_equal(
      object = cohortRelationshipT1C10$subCsWindowT,
      expected = 1
    ) # there is one subject in comparator that started within the window of Target cohort

    testthat::expect_equal(
      object = cohortRelationshipT1C10$subCeWindowT,
      expected = 1
    ) # there is one subject in comparator that ended within the window of Target cohort
  })
})
OHDSI/CohortDiagnostics documentation built on Oct. 9, 2024, 5:46 a.m.