tests/testthat/test-db-example-analysis.R

prepare_cdm <- function(con, write_schema) {

  # eunomia cdm
  eunomia_con <- DBI::dbConnect(duckdb::duckdb(eunomiaDir()))
  eunomia_cdm <- cdmFromCon(eunomia_con, cdmName = "eunomia", cdmSchema = "main", writeSchema = "main") %>%
    cdmSelect("person", "observation_period")

  cdm <- copyCdmTo(con = con,
                   cdm = eunomia_cdm,
                   schema = write_schema,
                   overwrite = TRUE)

  DBI::dbDisconnect(eunomia_con, shutdown = TRUE)

  return(cdm)
}

check_counts <- function(cdm) {
  nm <- cdm$person %>%
    dplyr::count() %>%
    dplyr::collect() %>%
    colnames()

  expect_true("n" %in% nm)

  nm <- cdm$person %>%
    dplyr::group_by(.data$gender_concept_id, .data$month_of_birth) %>%
    dplyr::count() %>%
    dplyr::collect() %>%
    colnames()

   expect_true(all(c("gender_concept_id", "month_of_birth", "n") %in% nm))
}

check_mutate <- function(cdm) {

  expect_true(unique(cdm$person %>%
    dplyr::mutate(new_var = 1L) %>%
    dplyr::pull("new_var")) == 1L)

  expect_true(unique(cdm$person %>%
                       dplyr::mutate(new_var = 1.55) %>%
                       dplyr::pull("new_var")) == 1.55)

  expect_true(all(c("F","M") %in%
   unique(cdm$person %>%
   dplyr::mutate(new_var = dplyr::if_else(gender_concept_id == 8532, "F", "M")) %>%
   dplyr::pull("new_var"))))

 expect_no_error(cdm$observation_period %>%
    dplyr::mutate(observation_period_start_date = as.Date(observation_period_start_date),
                  new_date = as.Date("2000-01-01"),
                  missing_date = as.Date(NA)))

 expect_no_error({
   cdm$person %>%
    dplyr::mutate(gender_concept_id = as.integer(gender_concept_id))
 })

 # expect_no_error(cdm$person %>%
 #    dplyr::mutate(year_1 = as.integer(1990),
 #                  month_1 = as.integer(01),
 #                  day_1 = as.integer(01)) %>%
 #    dplyr::mutate(date_1=!!asDate(paste0(
 #      year_1, "/", month_1, "/", day_1))) %>%
 #    dplyr::select(date_1))
}

check_summarise_dplyr <- function(cdm) {

  expect_true(all(c("min",  "mean", "max") %in%
                   colnames(cdm$person %>%
                             dplyr::summarise(min = min(year_of_birth, na.rm = TRUE),
                                              mean = mean(year_of_birth, na.rm = TRUE),
                                              max = max(year_of_birth, na.rm = TRUE),
                                              .groups = "drop") %>%
                               dplyr::collect())))


  nms <- cdm$person %>%
    dplyr::group_by(gender_concept_id, month_of_birth) %>%
    dplyr::summarise(min = min(year_of_birth, na.rm = TRUE),
                     mean = mean(year_of_birth, na.rm = TRUE),
                     max = max(year_of_birth, na.rm = TRUE),
                     .groups = "drop") %>%
    dplyr::ungroup() %>%
    dplyr::collect() %>%
    colnames()

  expect_true(all(c("gender_concept_id", "month_of_birth", "min",  "mean", "max") %in% nms))
}

check_summarise_quantiles <- function(cdm){

  expect_true(all(c("p0_quant","p20_quant","p40_quant","p50_quant",
                    "p60_quant","p80_quant","p100_quant") %in%
                    colnames(
  cdm$person %>%
   summariseQuantile(year_of_birth,
                      probs = c(0, 0.2, 0.4, 0.5, 0.6, 0.8, 1),
                      nameSuffix = "quant") %>%
   dplyr::collect())))

  expect_true(all(c("gender_concept_id", "month_of_birth",
                    "p0_quant","p20_quant","p40_quant","p50_quant",
                    "p60_quant","p80_quant","p100_quant") %in%
                    colnames(
  cdm$person %>%
    dplyr::group_by(gender_concept_id, month_of_birth) %>%
    summariseQuantile(year_of_birth,
                      probs = c(0, 0.2, 0.4, 0.5, 0.6, 0.8, 1),
                      nameSuffix = "quant")  %>%
    dplyr::collect())))

}

check_joins <- function(cdm){

  expect_no_error(cdm$person %>%
  dplyr::left_join(cdm$observation_period,
    by = "person_id"
  ))

  expect_no_error(cdm$person %>%
    dplyr::inner_join(cdm$observation_period,
                     by = "person_id"))

    expect_no_error( cdm$person %>%
    dplyr::full_join(cdm$observation_period,
                      by = "person_id"))

    expect_no_error(cdm$person %>%
    dplyr::anti_join(cdm$observation_period,
                     by = "person_id"))

}

check_dates <- function(cdm){

  d1 <- cdm$observation_period %>%
    dplyr::mutate(date_var = !!dateadd("observation_period_start_date",
                                       1,
                                       interval = "year"
    )) %>%
    dplyr::pull("date_var")
  expect_true(inherits(d1, c("Date", "POSIXt")))

  d2 <-  cdm$observation_period %>%
    dplyr::mutate(date_var = !!dateadd("observation_period_start_date",
                                       1,
                                       interval = "day"
    )) %>%
    dplyr::pull("date_var")
  expect_true(inherits(d2, c("Date", "POSIXt")))

  d3 <- cdm$observation_period %>%
    dplyr::mutate(date_var = !!datediff("observation_period_start_date",
                                        "observation_period_end_date"
    )) %>%
    dplyr::pull("date_var")
  expect_true(is.integer(d3) || is.numeric(d3))

  d4 <- cdm$observation_period %>%
    dplyr::mutate(year = !!datepart("observation_period_start_date", "year"),
                  month = !!datepart("observation_period_start_date", "month"),
                  day = !!datepart("observation_period_start_date", "day")) %>%
    dplyr::collect()
  expect_true(is.integer(d4$year) || is.numeric(d3))
  expect_true(is.integer(d4$month) || is.numeric(d3))
  expect_true(is.integer(d4$day) || is.numeric(d3))

}

check_row_number <-  function(cdm){

  expect_no_error(cdm$person %>%
                    dplyr::filter(row_number() == 1))

  expect_no_error(cdm$person %>%
    dplyr::group_by(person_id) %>%
      dplyr::filter(row_number() == 1))

  expect_no_error(cdm$observation_period %>%
                  dbplyr::window_order(observation_period_start_date) %>%
                  dplyr::filter(row_number() == 1))

  expect_no_error(cdm$observation_period %>%
                    dplyr::group_by(person_id) %>%
                    dbplyr::window_order(observation_period_start_date) %>%
                    dplyr::filter(row_number() == 1))

}

checks <- list()

for (dbtype in dbToTest) {
  if (dbtype == "bigquery") next
  test_that(glue::glue("{dbtype} - checking common usage"), {
    if (dbtype != "duckdb") skip_on_cran() else skip_if_not_installed("duckdb")
    con <- get_connection(dbtype)
    cdm_schema <- get_cdm_schema(dbtype)
    write_prefix <- paste0("test", as.integer(Sys.time()), "_")
    write_schema <- get_write_schema(dbtype, prefix = write_prefix)
    skip_if(any(write_schema == "") || any(cdm_schema == "") || is.null(con))
    skip_if_not_installed("duckdb") # prepare_cdm require duckdb
    cdm <- prepare_cdm(con, write_schema)
    check_counts(cdm)
    check_mutate(cdm)
    check_summarise_dplyr(cdm)
    check_summarise_quantiles(cdm)
    check_joins(cdm)
    check_dates(cdm)
    check_row_number(cdm)

    checks[[dbtype]] <<- dplyr::tibble(dbtype = dbtype,
                                      "dplyr::count" = 1,
                                      "dplyr::tally" = 1,
                                      "dplyr::mutate(as.integer(x))" = 1,
                                      "dplyr::mutate(as.Date(x))" = 1,
                                      # "CDMConnector::asDate" = 0,
                                      "dplyr::if_else" = 1,
                                      "dplyr::summaise(min(x))" = 1,
                                      "dplyr::summaise(mean(x))" = 1,
                                      "dplyr::summaise(max(x))" = 1,
                                      "CDMConnector::summariseQuantile" = 1,
                                      "dplyr::left_join" = 1,
                                      "dplyr::right_join" = 1,
                                      "dplyr::inner_join" = 1,
                                      "dplyr::anti_join" = 1,
                                      "CDMConnector::dateadd" = 0,
                                      "CDMConnector::datediff" = 0,
                                      "CDMConnector::datepart" = 0,
                                      "dplyr::row_number"  = 1)


    rm(cdm)

    # clean up
    cdm <- cdmFromCon(con,
                       cdmName = "test",
                       cdmSchema = cdm_schema,
                       writeSchema = write_schema)

    expect_s3_class(cdm, "cdm_reference")
    dropSourceTable(cdm, dplyr::contains(write_prefix))
    disconnect(con)
  })
}
# checks <- dplyr::bind_rows(checks)
# plot_levels <- rev(colnames(checks))

# plot <- checks %>%
#   tidyr::pivot_longer(
#     cols = !dbtype,
#     names_to = "check",
#     values_to = "value",
#     cols_vary = 'slowest',
#   ) %>%
#   dplyr::mutate(check=factor(check,
#                           levels = plot_levels)) %>%
#   ggplot2::ggplot(ggplot2::aes(x = dbtype,
#            y = check,
#            fill = as.character(value))) +
#   ggplot2::geom_tile(colour="grey", alpha=0.8, width=1) +
#   ggplot2::xlab(label = "Sample")+
#   ggplot2::theme_bw() +
#   ggplot2::theme(legend.position="none") +
#   ggplot2::scale_x_discrete(expand=c(0,0))+
#   ggplot2::scale_y_discrete(expand=c(0,0)) +
#   ggplot2::scale_fill_manual(values = c("red", "green"))+
#   ggplot2::xlab("DBMS")+
#   ggplot2::ylab("")+
#   ggplot2::ggtitle("Analytic functionality checked in continuous integration")

# ggplot2::ggsave("dbms_coverage/tests.png", plot)

Try the CDMConnector package in your browser

Any scripts or data that you put into this service are public.

CDMConnector documentation built on April 4, 2025, 4:42 a.m.