tests/testthat/dbplyrTestFunction.R

library(dplyr)
library(testthat)
# options("DEBUG_DATABASECONNECTOR_DBPLYR" = TRUE)

testDbplyrFunctions <- function(connectionDetails, cdmDatabaseSchema) {
  assertTempEmulationSchemaSet(connectionDetails$dbms)
  
  connection <- connect(connectionDetails)
  on.exit(dropEmulatedTempTables(connection))
  on.exit(disconnect(connection), add = TRUE)
  
  person <- tbl(connection, inDatabaseSchema(cdmDatabaseSchema, "person"))
  observationPeriod <- tbl(connection, inDatabaseSchema(cdmDatabaseSchema, "observation_period"))
  
  # Test filter, arrange, relocate, distinct -----------------------------------
  nMales <- person %>%
    filter(gender_concept_id == 8507) %>%
    count() %>%
    pull()
  expect_gt(nMales, 1)
  
  nMales2 <- person %>%
    filter(gender_concept_id %in% c(8507)) %>%
    count() %>%
    pull()
  expect_gt(nMales2, 1)
  
  longestObsPeriod <- observationPeriod %>%
    mutate(duration = dateDiff("day", observation_period_start_date, observation_period_end_date)) %>%
    arrange(desc(duration)) %>%
    # relocate(duration) %>% # relocate of field containing custom function no longer works in dbplr 2.3.1.
    head(1) %>%
    collect()
  expect_gt(longestObsPeriod$duration, 1)
  
  topAges <- person %>%
    inner_join(observationPeriod, by = "person_id") %>%
    mutate(age = year(observation_period_start_date) - year_of_birth) %>%
    distinct(age) %>%
    rename(person_age = age) %>%
    arrange(desc(person_age)) %>%
    head(10) %>%
    collect()
  expect_gt(nrow(topAges), 1)
  
  # Test copy_inline -----------------------------------------------------------
  rows <- dbplyr::copy_inline(connection, mtcars) %>% 
    filter(hp > 200) %>%
    arrange(wt, mpg) %>%
    collect()
  rows2 <- mtcars %>% 
    filter(hp > 200) %>%
    arrange(wt, mpg) %>%
    collect()
  expect_equivalent(rows, rows2, tolerance = 1e-6)

  # Test slicing ---------------------------------------------------------------
  personSample <- person %>%
    slice_sample(n = 10) %>%
    relocate(care_site_id) %>%
    collect()
  expect_equal(nrow(personSample), 10)
  expect_equal(which(names(personSample) == "care_site_id"), 1)
  
  # Test ifelse ----------------------------------------------------------------
  sexString <- person %>%
    mutate(sex = ifelse(.data$gender_concept_id == 8507, "Male",
                        ifelse(.data$gender_concept_id == 8532, "Female", NA)
    )) %>%
    select("person_id", "sex") %>%
    head() %>%
    collect()
  expect_true(all(sexString$sex %in% c("Male", "Female")))
  
  # Test creation of temp tables -----------------------------------------------
  cars2 <- copy_to(connection, cars, overwrite = TRUE)
  cars2 <- cars2 %>% collect()
  expect_equivalent(arrange(cars, speed, dist), arrange(cars2, speed, dist))
  
  tempTable <- person %>%
    filter(gender_concept_id == 8507) %>%
    compute()
  nMales2 <- tempTable %>%
    count() %>%
    pull()
  expect_gt(nMales2, 1)
  
  dataWithNa <- cars
  dataWithNa$speed[2] <- NA
  dataWithNa <- copy_to(connection, dataWithNa, overwrite = TRUE)
  filteredRow <- dataWithNa %>%
    filter(is.na(speed)) %>%
    collect()
  expect_equal(nrow(filteredRow), 1)
  
  aPersonId <- person %>%
    head(1) %>%
    pull(person_id)
  localTable = tibble(person_id = aPersonId, person_name = "Pedro")
  remoteTable <- copy_to(connection, localTable, overwrite = TRUE)
  result <- remoteTable %>%
    left_join(person, by = join_by(person_id)) %>%
    collect()
  expect_equal(result$person_name, "Pedro")
  
  # Test joins and unions ------------------------------------------------------
  
  # Casting duration to numeric because platforms like SQL Server compute the mean by first computing the sum, which
  # will not fit in an integer:
  durationDist <- person %>%
    inner_join(observationPeriod, by = "person_id") %>%
    mutate(duration = as.numeric(dateDiff("day", observation_period_start_date, observation_period_end_date))) %>%
    group_by(gender_concept_id) %>%
    summarize(mean_duration = mean(duration, na.rm = TRUE),
              min_duration = min(duration, na.rm = TRUE),
              max_duration = max(duration, na.rm = TRUE),
              count_duration = n()) %>%
    collect()
  
  expect_equal(nrow(durationDist), 2)
  
  resultOfAntiJoin <- observationPeriod %>% 
    anti_join(
      person %>%
        filter(!is.null(race_concept_id)),
      by = "person_id"
    ) %>%
    group_by(period_type_concept_id) %>%
    summarize(value_count = n()) %>%
    collect()
  expect_s3_class(resultOfAntiJoin, "data.frame")
  
  personTwice <- person %>%
    union_all(person) %>%
    count() %>%
    collect()
  expect_gt(personTwice$n, 1)
  
  tripleJoin <- person %>%
    left_join(observationPeriod, by = join_by(person_id)) %>%
    left_join(observationPeriod %>%
                select(person_id, dummy = observation_period_start_date),
                by = join_by(person_id)) %>%
    collect()
  expect_gt(nrow(tripleJoin), 0)
  
  # Test row_number ------------------------------------------------------------
  top10PersonsHardWay <- person %>%
    mutate(rn = row_number(person_id)) %>%
    filter(rn <= 10) %>%
    collect()
  expect_equal(nrow(top10PersonsHardWay), 10)
  
  # Test date functions --------------------------------------------------------
  nObsOverOneYear <- observationPeriod %>%
    filter(dateDiff("day", observation_period_start_date, observation_period_end_date) > 365) %>%
    count() %>%
    pull()
  
  expect_gt(nObsOverOneYear, 1)
  
  testData <- observationPeriod %>%
    mutate(plus_one_date = dateAdd("day", 1, observation_period_start_date),
           end_of_month_date = eoMonth(observation_period_start_date),
           obs_year = year(observation_period_start_date),
           obs_month = month(observation_period_start_date),
           obs_day = day(observation_period_start_date)) %>%
    mutate(is_later = if_else(plus_one_date > observation_period_start_date, 1, 0)) %>%
    head(1) %>%
    collect()
  expect_equal(as.Date(testData$plus_one_date), dateAdd("day", 1, testData$observation_period_start_date))
  expect_equal(testData$end_of_month_date, eoMonth(testData$observation_period_start_date))
  expect_equal(testData$obs_year, year(testData$observation_period_start_date))
  expect_equal(testData$obs_month, month(testData$observation_period_start_date))
  expect_equal(testData$obs_day, day(testData$observation_period_start_date))
  expect_equal(testData$is_later, 1)

  # dumbNameCars <- cars
  # names(dumbNameCars) <- c("Car speed", "Dist. to Stop")
  # copy_to(connection, dumbNameCars, name = "dn_cars")
  
  # disconnect(connection)
  invisible(NULL)
}

Try the DatabaseConnector package in your browser

Any scripts or data that you put into this service are public.

DatabaseConnector documentation built on May 29, 2024, 6:14 a.m.