tests/testthat/test-import.R

# Tests for functions.R

test_that("import", {
  # Path to test files
  path <- system.file("testdata", package = "mpathsenser")

  # Create db
  filename <- tempfile("test", fileext = ".db")
  db <- create_db(NULL, filename)

  # Import the data
  expect_message(import(
    path = path,
    db = db,
    recursive = FALSE
  ), "All files were successfully written to the database.")

  # Test whether no new files need to be processed
  expect_message(import(
    path = path,
    db = db,
    recursive = FALSE
  ), "No new files to process.")

  # Test non-existing path
  expect_error(
    import(path = tempfile(), db = db),
    "Directory .+ does not exist."
  )
  temp <- tempfile()
  dir.create(temp)
  expect_error(
    import(db = db, path = temp),
    "Can't find JSON files in .+\\."
  )
  unlink(temp, recursive = TRUE)
  dbDisconnect(db)
  file.remove(filename)

  # Test deprecated arguments
  filename <- tempfile("test2", fileext = ".db")
  db2 <- create_db(NULL, filename)
  warnings_log <- capture_warnings(import(
    path = path,
    db = db2,
    recursive = TRUE # Includes broken files
  ))
  expect_match(warnings_log, "Invalid JSON format in file broken\\/broken\\d\\.json", all = FALSE)
  expect_match(warnings_log, "Some files could not be written to the database.", all = FALSE)
  dbDisconnect(db2)
  file.remove(filename)
})

test_that(".import_read_json", {
  path <- system.file("testdata", package = "mpathsenser")

  expect_type(
    .import_read_json(path, "test.json"),
    "list"
  )
  expect_type(
    .import_read_json(NULL, file.path(path, "test.json")),
    "list"
  )

  expect_warning(
    .import_read_json(path, "foo"),
    "foo does not exist."
  )
  expect_equal(
    suppressWarnings(.import_read_json(path, "foo")),
    NA
  )

  # Empty file
  expect_equal(
    suppressWarnings(.import_read_json(path, "empty.json")),
    NA
  )
  tempfile <- tempfile(fileext = ".json")
  file.create(tempfile)
  expect_equal(.import_read_json(NULL, tempfile), NA)
  unlink(tempfile)

  path <- system.file("testdata", "broken", package = "mpathsenser")
  expect_warning(
    .import_read_json(path, "broken1.json"),
    "Invalid JSON format in file broken1.json"
  )
  expect_equal(
    suppressWarnings(.import_read_json(path, "broken1.json")),
    NULL
  )
})

test_that("safe_extract", {
  data <- list(list(list(a = "a", b = "b", c = NULL, d = NA)))
  expect_equal(safe_extract(data, "a"), "a")
  expect_equal(safe_extract(data, "b"), "b")
  expect_equal(safe_extract(data, "c"), NA)
  expect_equal(safe_extract(data, "d"), NA)
  expect_equal(safe_extract(data, "e"), NA)

  data <- list(
    list(
      list(a = "a", b = "b", c = "c", d = NA, e = NULL, f = NULL)
    ),
    list(
      list(a = "a", b = NA, c = NULL, d = NA, e = NA, f = NULL)
    )
  )
  expect_equal(safe_extract(data, "a"), c("a", "a"))
  expect_equal(safe_extract(data, "b"), c("b", NA))
  expect_equal(safe_extract(data, "c"), c("c", NA))
  expect_equal(safe_extract(data, "d"), c(NA, NA))
  expect_equal(safe_extract(data, "e"), c(NA, NA))
  expect_equal(safe_extract(data, "f"), c(NA, NA))
  expect_equal(safe_extract(data, "g"), c(NA, NA))
})

test_that(".import_clean", {
  data <- list(
    list(
      header = list(
        study_id = "test-study",
        device_role_name = "masterphone",
        trigger_id = "1",
        user_id = "12345",
        start_time = "2021-11-14T14:01:00.000000Z",
        time_zone_name = "CET",
        data_format = list(
          namespace = "dk.cachet.carp",
          name = "accelerometer"
        )
      ),
      body = list()
    ),
    list(
      header = list(
        study_id = "test-study",
        device_role_name = "masterphone",
        trigger_id = "1",
        user_id = "12345",
        start_time = "2021-11-14T14:01:00.000000Z",
        time_zone_name = "CET",
        data_format = list(
          namespace = "dk.cachet.carp",
          name = "accelerometer"
        )
      ),
      body = list()
    )
  )

  expect_no_error(.import_clean(data, "accelerometer"))
  expect_equal(nrow(.import_clean(data, "accelerometer")), 2)

  # Set the first instance of study_id to NULL
  data[[1]][[1]]$study_id <- NULL
  expect_no_error(.import_clean(data, "accelerometer"))
  expect_equal(nrow(.import_clean(data, "accelerometer")), 2)
  # Interesting bug when using unlist in safe_extract: NULLs are implicitly dropped, so if only one
  # value is left, it is recycled in the rest of the data frame. Hence doing this test in two steps.
  expect_equal(.import_clean(data, "accelerometer")$study_id, c(NA, "test-study"))
  data[[2]][[1]]$study_id <- NULL
  expect_no_error(.import_clean(data, "accelerometer"))
  expect_equal(nrow(.import_clean(data, "accelerometer")), 2)
  expect_equal(.import_clean(data, "accelerometer")$study_id, c(NA, NA))

  data[[1]][[1]]$user_id <- NULL
  expect_no_error(.import_clean(data, "accelerometer"))
  expect_equal(nrow(.import_clean(data, "accelerometer")), 1)
  data[[2]][[1]]$user_id <- NULL
  expect_error(.import_clean(data, "accelerometer"), NA)
  expect_equal(nrow(.import_clean(data, "accelerometer")), 0)
})

test_that(".import_clean_new", {
  data <- list(
    list(
      sensorStartTime = 1.705944e+15,
      data = list(
        `__type` = "dk.cachet.carp.wifi",
        ip = "192.168.1"
      )
    ),
    list(
      sensorStartTime = 1.705945e+15,
      sensorEndTime = 1.705945e+15,
      data = list(
        `__type` = "dk.cachet.carp.ambientLight",
        meanLux = 123
      )
    )
  )

  file_name <- "123_study_456_m_Path_sense_2021-11-14T14:01:00.000000.json"

  true <- tibble(
    study_id = "study",
    participant_id = "456",
    data_format = "cams 1.0.0",
    start_time = as.character(
      as.POSIXct(c(1.705944e+15, 1.705945e+15) / 1e6, tz = "UTC", origin = "1970-01-01")
    ),
    end_time = as.character(
      as.POSIXct(c(NA, 1.705945e+15) / 1e6, tz = "UTC", origin = "1970-01-01")
    ),
    sensor = c("wifi", "ambientLight"),
    data = list(
      list(
        ip = "192.168.1"
      ),
      list(
        meanLux = 123
      )
    )
  )

  expect_equal(.import_clean_new(data, file_name), true)

  # If the file name is incorrect, NA is returned for the study and participant_id
  res <- .import_clean_new(data, "foo")
  expect_true(all(is.na(select(res, "study_id", "participant_id"))))
})

test_that(".import_map_sensor_names", {
  expect_equal(
    .import_map_sensor_names("accelerationfeatures"),
    "Accelerometer"
  )

  # Non-existing sensor names are unchanged
  expect_equal(
    .import_map_sensor_names("Foo"),
    "Foo"
  )
})

test_that(".import_is_duplicate", {
  db <- create_db(NULL, tempfile())

  data <- data.frame(
    study_id = "test_study",
    data_format = "carp",
    participant_id = c("12345", "12345", "23456", "23456"),
    file_name = c("12345/test1.json", "12345/test2.json", "23456/test1.json", "23456/test2.json")
  )
  add_study(db, study_id = data$study_id, data_format = data$data_format)
  add_participant(db, participant_id = data$participant_id, study_id = data$study_id)
  add_processed_files(db,
    file_name = data$file_name,
    study_id = data$study_id,
    participant_id = data$participant_id
  )

  expect_equal(.import_is_duplicate(db@dbname, data), rep(TRUE, 4))

  data2 <- data.frame(
    study_id = c("test_study", "test_study", "foo-study", "foo-study"),
    data_format = c("carp", "carp", "bar", "bar"),
    participant_id = c("12345", "23456", "34567", "34567"),
    file_name = c("12345/test3.json", "23456/test3.json", "34567/test1.json", "34567/test2.json")
  )
  data2 <- rbind(data[c(1, 2), ], data2)

  expect_equal(
    .import_is_duplicate(db@dbname, data2),
    c(TRUE, TRUE, FALSE, FALSE, FALSE, FALSE)
  )

  expect_equal(.import_is_duplicate(db@dbname, data.frame()), NA)
  expect_equal(.import_is_duplicate(db@dbname, list()), NA)
  expect_equal(.import_is_duplicate(db@dbname, NULL), NA)

  # Clean up
  dbDisconnect(db)
  unlink(db@dbname)
})

test_that(".import_extract_sensor_data", {
  data <- tibble::tibble(
    body = list(
      list(
        body = list(
          id = "5d0ac8d0-777c-11eb-bf47-ed3b61db1e5e",
          timestamp = "2021-02-25T15:15:58.557282Z",
          data = list(
            list(
              timestamp = "2021-02-25T15:15:58.557282Z",
              xm = NA,
              ym = NA,
              zm = NA,
              xms = NA,
              yms = NA,
              zms = NA,
              n = NA
            ),
            list(
              timestamp = "2021-02-25T15:15:58.557282Z",
              xm = NA,
              ym = NA,
              zm = NA,
              xms = NA,
              yms = NA,
              zms = NA,
              n = NA
            )
          )
        )
      ),
      list(
        body = list(
          id = "5d0ac8d0-777c-11eb-bf47-ed3b61db1e5f",
          timestamp = "2021-02-25T15:15:58.557282Z",
          data = list(
            list(
              timestamp = "2021-02-25T15:15:58.557282Z",
              xm = NA,
              ym = NA,
              zm = NA,
              xms = NA,
              yms = NA,
              zms = NA,
              n = 10
            ),
            list(
              timestamp = "2021-02-25T15:15:58.557282Z",
              xm = NA,
              ym = NA,
              zm = NA,
              xms = NA,
              yms = NA,
              zms = NA,
              n = NA
            )
          )
        )
      )
    ),
    study_id = "test-study",
    participant_id = "12345",
    start_time = "2021-02-25T15:15:58.557282Z",
    data_format = "carp",
    sensor = "accelerometer"
  )

  expect_equal(
    .import_extract_sensor_data(data)$Accelerometer$measurement_id,
    c(
      "5d0ac8d0-777c-11eb-bf47-ed3b61db1e5e_1",
      "5d0ac8d0-777c-11eb-bf47-ed3b61db1e5e_2",
      "5d0ac8d0-777c-11eb-bf47-ed3b61db1e5f_1",
      "5d0ac8d0-777c-11eb-bf47-ed3b61db1e5f_2"
    )
  )

  # Test sensor function that provides a warning
  data$sensor <- c("Accelerometer", "Keyboard")
  expect_warning(
    .import_extract_sensor_data(data),
    "Function for implementing keyboard data currently not implemented."
  )

  expect_equal(
    suppressWarnings(.import_extract_sensor_data(data)),
    list(
      Accelerometer = data.frame(
        measurement_id = c(
          "5d0ac8d0-777c-11eb-bf47-ed3b61db1e5e_1",
          "5d0ac8d0-777c-11eb-bf47-ed3b61db1e5e_2"
        ),
        participant_id = "12345",
        date = "2021-02-25",
        time = "15:15:58.557",
        n = NA,
        x_mean = NA,
        y_mean = NA,
        z_mean = NA,
        x_energy = NA,
        y_energy = NA,
        z_energy = NA
      ),
      Keyboard = NULL
    )
  )

  # Test function that provides an error
  data$sensor <- "Accelerometer"
  data2 <- data
  data2$body <- list(list(foo = "bar"), list(foo = "bar"))
  expect_equal(
    .import_extract_sensor_data(data2),
    NA
  )

  data$sensor[1] <- "unknown"
  expect_equal(
    .import_extract_sensor_data(data)$Accelerometer$measurement_id,
    c(
      "5d0ac8d0-777c-11eb-bf47-ed3b61db1e5f_1",
      "5d0ac8d0-777c-11eb-bf47-ed3b61db1e5f_2"
    )
  )

  data$sensor[1] <- "Foo"
  expect_warning(
    .import_extract_sensor_data(data),
    "Sensor 'Foo' is not supported by this package."
  )
  expect_equal(
    names(suppressWarnings(.import_extract_sensor_data(data))),
    "Accelerometer"
  )

  data$sensor[1] <- "Gyroscope"
  expect_equal(
    names(.import_extract_sensor_data(data, sensors = "Accelerometer")),
    "Accelerometer"
  )
  expect_equal(
    names(.import_extract_sensor_data(data, sensors = "Gyroscope")),
    "Gyroscope"
  )

  data$sensor <- c(NA, NA)
  expect_equal(
    .import_extract_sensor_data(data),
    structure(list(), names = character(0))
  )

  data$sensor <- "Accelerometer"
  data$body <- list(list(), list())
  expect_equal(
    .import_extract_sensor_data(data),
    NA
  )
})

test_that(".import_write_to_db", {
  db <- create_db(NULL, tempfile())

  data <- list(
    Pedometer = tibble::tibble(
      measurement_id = "5d0ac8d0-777c-11eb-bf47-ed3b61db1e5e_1",
      participant_id = "12345",
      date = "2021-02-25",
      time = "15:15:58.557",
      step_count = 1
    )
  )
  meta_data <- data.frame(
    participant_id = "12345",
    study_id = "test-study",
    data_format = "carp",
    file_name = "12345/test1.json"
  )

  expect_equal(.import_write_to_db(db, meta_data, data), 1)
  expect_equal(.import_write_to_db(db, meta_data, data), 0)

  # Test that transactions are rolled back if an error occurs
  data$Pedometer <- rbind(data$Pedometer, data$Pedometer)
  data$Pedometer$measurement_id[[1]] <- "5d0ac8d0-777c-11eb-bf47-ed3b61db1e5e_2"
  data$Pedometer$measurement_id[[2]] <- NA
  expect_error(
    .import_write_to_db(db, meta_data, data),
    "NOT NULL constraint failed: Pedometer.measurement_id"
  )
  expect_false(
    "5d0ac8d0-777c-11eb-bf47-ed3b61db1e5e_2" %in%
      DBI::dbGetQuery(db, "SELECT measurement_id FROM Accelerometer")[[1]]
  )
  expect_equal(nrow(DBI::dbGetQuery(db, "SELECT * FROM  Pedometer")), 1)


  # Clean up
  dbDisconnect(db)
  unlink(db@dbname)
})

test_that("save2db", {
  # Create db
  filename <- tempfile("foo", fileext = ".db")
  db <- create_db(NULL, filename)

  dbExecute(db, "INSERT INTO Study VALUES('12345', 'mpathsenser')")
  dbExecute(db, "INSERT INTO Participant VALUES('12345', '12345')")
  db_size <- file.size(filename)

  # Define the data
  data <- data.frame(
    measurement_id = paste0("12345_", 1:1000),
    participant_id = "12345",
    date = "2021-11-14",
    time = "16:40:01.123",
    step_count = 1
  )

  # Write to db
  expect_error(
    DBI::dbWithTransaction(db, save2db(db, "Pedometer", data)),
    NA
  )

  # Check if the file size increased
  db_size2 <- file.size(filename)
  expect_gt(db_size2, db_size)

  # Check the data output
  expect_equal(
    DBI::dbGetQuery(db, "SELECT * FROM Pedometer"),
    data
  )

  # Entry with the same ID should simply be skipped and give no error
  expect_error(
    DBI::dbWithTransaction(db, save2db(db = db, name = "Pedometer", data = data)),
    NA
  )
  DBI::dbExecute(db, "VACUUM") # A vacuum to clear the tiny increase by replacement :)
  db_size3 <- file.size(filename)
  expect_equal(db_size2, db_size3)
  expect_equal(DBI::dbGetQuery(db, "SELECT COUNT(*) FROM Pedometer")[[1]], 1000L)
  expect_equal(
    DBI::dbGetQuery(db, "SELECT * FROM Pedometer"),
    data
  )

  # Now try with part of the data being replicated
  data <- rbind(data, data.frame(
    measurement_id = paste0("12345_", 500:1500),
    participant_id = "12345",
    date = "2021-11-14",
    time = "16:40:01.123",
    step_count = 1
  ))

  expect_error(
    DBI::dbWithTransaction(
      db,
      save2db(
        db = db,
        name = "Pedometer",
        data = data.frame(
          measurement_id = paste0("12345_", 500:1500),
          participant_id = "12345",
          date = "2021-11-14",
          time = "16:40:01.123",
          step_count = 1
        )
      )
    ),
    NA
  )
  db_size4 <- file.size(filename)
  expect_gt(db_size4, db_size3)
  expect_equal(DBI::dbGetQuery(db, "SELECT COUNT(*) FROM Pedometer")[[1]], 1500L)
  expect_equal(
    DBI::dbGetQuery(db, "SELECT * FROM Pedometer"),
    distinct(data)
  )

  # Cleanup
  dbDisconnect(db)
  file.remove(filename)
})

Try the mpathsenser package in your browser

Any scripts or data that you put into this service are public.

mpathsenser documentation built on May 29, 2024, 9:11 a.m.