Nothing
## link ===============
test_that("link", {
dat1 <- data.frame(
time = rep(seq.POSIXt(as.POSIXct("2021-11-14 13:00:00"), by = "1 hour", length.out = 3), 2),
participant_id = c(rep("12345", 3), rep("23456", 3)),
item_one = rep(c(40, 50, 60), 2)
)
dat2 <- data.frame(
time = rep(seq.POSIXt(as.POSIXct("2021-11-14 12:50:00"), by = "5 min", length.out = 30), 2),
participant_id = c(rep("12345", 30), rep("23456", 30)),
x = rep(1:30, 2)
)
res <- link(
x = dat1,
y = dat2,
by = "participant_id",
time = "time",
y_time = "time",
offset_before = 1800,
split = NULL
)
true <- tibble::tibble(
time = rep(c(
as.POSIXct("2021-11-14 13:00:00"), as.POSIXct("2021-11-14 14:00:00"),
as.POSIXct("2021-11-14 15:00:00")
), 2),
participant_id = c(rep("12345", 3), rep("23456", 3)),
item_one = rep(c(40, 50, 60), 2),
data = rep(list(
tibble::tibble(
time = seq.POSIXt(
from = as.POSIXct("2021-11-14 12:50:00"),
length.out = 3, by = "5 min"
),
x = 1:3
),
tibble::tibble(
time = seq.POSIXt(
from = as.POSIXct("2021-11-14 13:30:00"),
length.out = 7, by = "5 min"
),
x = 9:15
),
tibble::tibble(
time = seq.POSIXt(
from = as.POSIXct("2021-11-14 14:30:00"),
length.out = 7, by = "5 min"
),
x = 21:27
)
), 2)
)
expect_equal(res, true)
# Test warning if time and y_time are not specified
lifecycle::expect_deprecated(
link(
x = dat1,
y = dat2,
by = "participant_id",
y_time = "time",
offset_before = 1800,
split = NULL
),
"The `time` argument of `link\\(\\)` must not be missing as of mpathsenser 1.1.2."
)
lifecycle::expect_deprecated(
link(
x = dat1,
y = dat2,
by = "participant_id",
time = "time",
offset_before = 1800,
split = NULL
),
"The `y_time` argument of `link\\(\\)` must not be missing as of mpathsenser 1.1.2."
)
# Test x and y identical
expect_error(
link(
x = dat1,
y = dat1,
by = "participant_id",
time = "time",
y_time = "time",
offset_before = 1800,
split = NULL
),
"`x` and `y` are identical."
)
# Test without offset bu using time and end_time
res2 <- dat1 %>%
dplyr::rename(end_time = time) %>%
mutate(start_time = end_time - 1800) %>%
link(
x = .,
y = dat2,
by = "participant_id",
time = start_time,
end_time = end_time,
y_time = time
)
true2 <- true
true2$start_time <- true2$time - 1800
true2 <- select(true2, end_time = time, participant_id, item_one, start_time, data)
expect_equal(res2, true2)
# Test that end_time and offset_before and offset_after cannot be used at the same time.
expect_error(
dat1 %>%
dplyr::rename(end_time = time) %>%
mutate(start_time = end_time - 1800) %>%
link(
x = .,
y = dat2,
by = "participant_id",
time = start_time,
end_time = end_time,
y_time = time,
offset_before = 1800
),
"`end_time` and `offset_before` or `offset_after` cannot be used at the same time."
)
# Test split argument
res <- link(
x = dat1,
y = dat2,
by = "participant_id",
time = time,
y_time = time,
offset_before = 1800,
split = 6
)
expect_equal(res, true)
# Scrambled test
scramble <- function(data) {
idx <- sample(seq_along(data[, 1]), nrow(data))
data[idx, ]
}
res <- link(
x = scramble(dat1),
y = scramble(dat2),
by = "participant_id",
time = time,
y_time = time,
offset_before = 1800
) %>%
arrange(participant_id, time)
expect_equal(res, true)
res <- link(
x = dat1,
y = dat2,
by = "participant_id",
time = time,
y_time = time,
offset_after = 1800
)
true <- tibble::tibble(
time = rep(c(
as.POSIXct("2021-11-14 13:00:00"), as.POSIXct("2021-11-14 14:00:00"),
as.POSIXct("2021-11-14 15:00:00")
), 2),
participant_id = c(rep("12345", 3), rep("23456", 3)),
item_one = rep(c(40, 50, 60), 2),
data = rep(list(
tibble::tibble(
time = seq.POSIXt(
from = as.POSIXct("2021-11-14 13:00:00"),
length.out = 7, by = "5 min"
),
x = 3:9
),
tibble::tibble(
time = seq.POSIXt(
from = as.POSIXct("2021-11-14 14:00:00"),
length.out = 7, by = "5 min"
),
x = 15:21
),
tibble::tibble(
time = seq.POSIXt(
from = as.POSIXct("2021-11-14 15:00:00"),
length.out = 4, by = "5 min"
),
x = 27:30
)
), 2)
)
expect_equal(res, true)
res <- link(
x = scramble(dat1),
y = scramble(dat2),
by = "participant_id",
time = time,
y_time = time,
offset_after = 1800
) %>%
arrange(participant_id, time)
expect_equal(res, true)
# Test add_before and add_after
# Add one minute to dat2 time as otherwise a row would not added before
# This is due to new functionality where a row is not added if the first measurements in an
# interval is equal to the start time
dat2$time <- dat2$time + lubridate::minutes(1)
res <- link(
x = dat1,
y = dat2,
by = "participant_id",
time = time,
y_time = time,
offset_before = 1800,
add_before = TRUE,
add_after = TRUE
)
true <- tibble::tibble(
time = rep(c(
as.POSIXct("2021-11-14 13:00:00"), as.POSIXct("2021-11-14 14:00:00"),
as.POSIXct("2021-11-14 15:00:00")
), 2),
participant_id = c(rep("12345", 3), rep("23456", 3)),
item_one = rep(c(40, 50, 60), 2),
data = rep(list(
tibble::tibble(
time = c(
seq.POSIXt(
from = as.POSIXct("2021-11-14 12:51:00"),
length.out = 2, by = "5 min"
),
as.POSIXct("2021-11-14 13:00:00")
),
x = c(1:3),
original_time = c(
rep(as.POSIXct(NA), 2),
as.POSIXct("2021-11-14 13:01:00")
)
),
tibble::tibble(
time = c(
as.POSIXct("2021-11-14 13:30:00"),
seq.POSIXt(
from = as.POSIXct("2021-11-14 13:31:00"),
length.out = 6, by = "5 min"
),
as.POSIXct("2021-11-14 14:00:00")
),
x = c(8, 9:14, 15),
original_time = c(
as.POSIXct("2021-11-14 13:26:00"),
rep(as.POSIXct(NA), 6),
as.POSIXct("2021-11-14 14:01:00")
)
),
tibble::tibble(
time = c(
as.POSIXct("2021-11-14 14:30:00"),
seq.POSIXt(
from = as.POSIXct("2021-11-14 14:31:00"),
length.out = 6, by = "5 min"
),
as.POSIXct("2021-11-14 15:00:00")
),
x = c(20, 21:26, 27),
original_time = c(
as.POSIXct("2021-11-14 14:26:00"),
rep(lubridate::`NA_POSIXct_`, 6),
as.POSIXct("2021-11-14 15:01:00")
)
)
), 2)
)
expect_equal(res, true, ignore_attr = TRUE)
# Without offset, using time, end_time, and y_time
res2 <- dat1 %>%
dplyr::rename(end_time = time) %>%
mutate(start_time = end_time - 1800) %>%
link(
x = .,
y = dat2,
by = "participant_id",
time = start_time,
end_time = end_time,
y_time = time,
add_before = TRUE,
add_after = TRUE
)
true$start_time <- true$time - 1800
true <- select(true, end_time = time, participant_id, item_one, start_time, data)
expect_equal(res2, true)
# Bug #6: Test whether original_time is present in all nested data columns
# Create some data to use
dat1 <- data.frame(
time = c(
rep(seq.POSIXt(as.POSIXct("2021-11-14 13:00:00"), by = "1 hour", length.out = 3), 2),
as.POSIXct("2021-11-14 13:00:00"), as.POSIXct("2021-11-14 13:00:00")
),
participant_id = c(rep("12345", 3), rep("23456", 3), "45678", "56789"),
item_one = c(rep(c(40, 50, 60), 2), 40, 40)
)
dat2 <- data.frame(
time = c(
rep(seq.POSIXt(as.POSIXct("2021-11-14 12:50:00"), by = "5 min", length.out = 30), 2),
seq.POSIXt(as.POSIXct("2021-11-14 12:30:00"), by = "5 min", length.out = 6)
),
participant_id = c(rep("12345", 30), rep("23456", 30), rep("45678", 6)),
x = c(rep(1:30, 2), 1:6)
)
# Link together, make sure to include rows before and after
res <- link(
x = dat1,
y = dat2,
by = "participant_id",
time = time,
y_time = time,
offset_before = 1800,
add_before = TRUE,
add_after = TRUE
)
# Use the results, e.g. to filter out the extra added rows by offset_before and offset_after
# (as you have to do for the number of screen unlocks).
expect_true(all(purrr::map_lgl(res$data, ~ "original_time" %in% colnames(.x))))
# Bug: add_before and add_after were ignored (or rather lost) if data_main is empty
dat1 <- data.frame(
time = rep(seq.POSIXt(as.POSIXct("2021-11-14 13:00:00"), by = "1 hour", length.out = 3), 2),
participant_id = c(rep("12345", 3), rep("23456", 3)),
item_one = rep(c(40, 50, 60), 2)
)
dat2 <- data.frame(
time = rep(c(as.POSIXct("2021-11-14 11:50:00"), as.POSIXct("2021-11-14 16:50:00")), 2),
participant_id = c(rep("12345", 2), rep("23456", 2)),
x = rep(1:2, 2)
)
res <- link(
x = dat1,
y = dat2,
by = "participant_id",
time = time,
y_time = time,
offset_before = 1800L,
add_before = TRUE,
add_after = TRUE
)
expect_true(all(purrr::map_int(res$data, nrow) != 0))
# Check that timezones stay consistent, even with add_before and add_after
dat1 <- data.frame(
time = rep(
seq.POSIXt(as.POSIXct("2021-11-14 13:00:00", tz = "Europe/Brussels"),
by = "1 hour",
length.out = 3
),
2
),
participant_id = c(rep("12345", 3), rep("23456", 3)),
item_one = rep(c(40, 50, 60), 2)
)
dat2 <- data.frame(
time = rep(
seq.POSIXt(as.POSIXct("2021-11-14 11:00:00", tz = "UTC"),
by = "10 mins",
length.out = 40
),
2
),
participant_id = c(rep("12345", 40), rep("23456", 40)),
x = rep(1:2, 2)
)
res <- link(
x = dat1,
y = dat2,
by = "participant_id",
time = time,
y_time = time,
offset_before = 1800L,
add_before = TRUE,
add_after = TRUE
)
expect_equal(attr(res$time, "tz"), "Europe/Brussels")
expect_equal(unique(map_chr(res$data, ~ attr(.x$time, "tz"))), "UTC")
expect_equal(unique(map_chr(res$data, ~ attr(.x$original_time, "tz"))), "UTC")
# Make sure link does not add an extra row if first measurement equal start of the interval or
# vice versa for the end of the interval.
dat1 <- data.frame(
time = as.POSIXct(c("2021-11-14 11:00:00", "2021-11-14 12:00:00")),
participant_id = "12345",
item_one = c(40, 50)
)
dat2 <- data.frame(
time = as.POSIXct(c(
"2021-11-14 10:20:00", "2021-11-14 10:30:00", "2021-11-14 10:40:00",
"2021-11-14 11:00:00", "2021-11-14 11:10:00", "2021-11-14 11:30:01",
"2021-11-14 11:40:00", "2021-11-14 12:10:00"
)),
participant_id = "12345",
x = 1:8
)
true <- tibble::tibble(
time = as.POSIXct(c("2021-11-14 11:00:00", "2021-11-14 12:00:00")),
participant_id = "12345",
item_one = c(40, 50),
data = list(
tibble::tibble(
time = as.POSIXct(c("2021-11-14 10:30:00", "2021-11-14 10:40:00", "2021-11-14 11:00:00")),
x = c(2, 3, 4),
original_time = as.POSIXct(c(NA, NA, NA))
),
tibble::tibble(
time = as.POSIXct(c(
"2021-11-14 11:30:00", "2021-11-14 11:30:01", "2021-11-14 11:40:00",
"2021-11-14 12:00:00"
)),
x = c(5, 6, 7, 8),
original_time = as.POSIXct(c("2021-11-14 11:10:00", NA, NA, "2021-11-14 12:10:00"))
)
)
)
res <- link(
x = dat1,
y = dat2,
by = "participant_id",
time = time,
y_time = time,
offset_before = 1800L,
add_before = TRUE,
add_after = TRUE
)
expect_equal(true, res)
# Check what should be done when any of the times is missing
# Preferably, we just want to create a 0-row tibble with `proto`, as still adding before or after
# is kind of strange. Additionally, the column names should be consistent with the rest of the
# output.
dat1 <- dat1 |>
mutate(end_time = lead(time))
res <- link(
x = dat1,
y = dat2,
by = "participant_id",
time = time,
end_time = end_time,
y_time = time,
add_before = TRUE,
add_after = TRUE
)
true <- tibble::tibble(
time = as.POSIXct(c("2021-11-14 11:00:00", "2021-11-14 12:00:00")),
participant_id = "12345",
item_one = c(40, 50),
end_time = as.POSIXct(c("2021-11-14 12:00:00", NA)),
data = list(
tibble::tibble(
time = as.POSIXct(c(
"2021-11-14 11:00:00", "2021-11-14 11:10:00", "2021-11-14 11:30:00",
"2021-11-14 11:40:00", "2021-11-14 12:00:00"
)),
x = c(4, 5, 6, 7, 8),
original_time = as.POSIXct(c(NA, NA, NA, NA, "2021-11-14 12:10:00"))
),
tibble::tibble(
time = as.POSIXct(double(0)),
x = integer(0),
original_time = as.POSIXct(double(0))
)
)
)
expect_equal(res, true)
})
## link_db ===============
test_that("link_db", {
path <- system.file("testdata", package = "mpathsenser")
db <- open_db(path, "test.db")
dat1 <- data.frame(
time = as.POSIXct(c("2021-11-14 13:00:00", "2021-11-14 14:00:00", "2021-11-14 15:00:00"),
tz = "UTC"
),
participant_id = "12345",
item_one = c(40, 50, 60)
)
# Check deprecation
lifecycle::expect_deprecated(link_db(db, "Activity", "Connectivity", offset_after = 1800L))
# Disable deprecation check
rlang::local_options(lifecycle_verbosity = "quiet")
# Check basic functionality
res <- link_db(db, "Activity", "Connectivity", offset_after = 1800)
true <- tibble::tibble(
measurement_id = c(
"fbf85cd7-6d37-53a8-5c44-ad8fe13ef7ac",
"ef96364c-d1f4-5f73-ce40-277f078e3d0f",
"5ba54e77-4bcf-c8d1-17ff-71b9ed908897"
),
participant_id = "12345",
time = as.POSIXct(c("2021-11-14 13:59:59", "2021-11-14 14:00:00", "2021-11-14 14:00:01"),
tz = "UTC"
),
confidence = c(NA, 100L, 99L),
type = c(NA, "WALKING", "STILL"),
data = list(
tibble::tibble(
measurement_id = c(
"27a5777a-ec41-80de-afa4-d2e7f6b02fcf",
"2d430c2a-5b16-1dce-0e2f-c049c44e3729"
),
time = as.POSIXct(c("2021-11-14 14:00:00", "2021-11-14 14:01:00"), tz = "UTC"),
connectivity_status = c("wifi", NA)
),
tibble::tibble(
measurement_id = c(
"27a5777a-ec41-80de-afa4-d2e7f6b02fcf",
"2d430c2a-5b16-1dce-0e2f-c049c44e3729"
),
time = as.POSIXct(c("2021-11-14 14:00:00", "2021-11-14 14:01:00"), tz = "UTC"),
connectivity_status = c("wifi", NA)
),
tibble::tibble(
measurement_id = "2d430c2a-5b16-1dce-0e2f-c049c44e3729",
time = as.POSIXct("2021-11-14 14:01:00", tz = "UTC"),
connectivity_status = NA_character_
)
)
)
expect_equal(res, true)
# Check reverse
res <- link_db(db, "Activity", "Connectivity", offset_after = 1800, reverse = TRUE)
true <- tibble::tibble(
measurement_id = c(
"27a5777a-ec41-80de-afa4-d2e7f6b02fcf",
"2d430c2a-5b16-1dce-0e2f-c049c44e3729"
),
participant_id = "12345",
time = as.POSIXct(c("2021-11-14 14:00:00", "2021-11-14 14:01:00"), tz = "UTC"),
connectivity_status = c("wifi", NA),
data = list(
tibble::tibble(
measurement_id = c(
"ef96364c-d1f4-5f73-ce40-277f078e3d0f",
"5ba54e77-4bcf-c8d1-17ff-71b9ed908897"
),
time = as.POSIXct(c("2021-11-14 14:00:00", "2021-11-14 14:00:01"), tz = "UTC"),
confidence = c(100L, 99L),
type = c("WALKING", "STILL"),
),
tibble::tibble(
measurement_id = character(0),
time = structure(numeric(0), tzone = "UTC", class = c("POSIXct", "POSIXt")),
confidence = integer(0),
type = character(0)
)
)
)
expect_equal(res, true)
# Check with external data
res <- link_db(db, "Activity", external = dat1, offset_after = 1800)
true <- tibble::tibble(
dat1,
data = list(
tibble::tibble(
measurement_id = character(0),
time = structure(numeric(0), tzone = "UTC", class = c("POSIXct", "POSIXt")),
confidence = integer(0L),
type = character(0)
),
tibble::tibble(
measurement_id = c(
"ef96364c-d1f4-5f73-ce40-277f078e3d0f",
"5ba54e77-4bcf-c8d1-17ff-71b9ed908897"
),
time = as.POSIXct(c("2021-11-14 14:00:00", "2021-11-14 14:00:01"), tz = "UTC"),
confidence = c(100L, 99L),
type = c("WALKING", "STILL")
),
tibble::tibble(
measurement_id = character(0),
time = structure(numeric(0), tzone = "UTC", class = c("POSIXct", "POSIXt")),
confidence = integer(0),
type = character(0)
)
)
)
expect_equal(res, true)
# Argument checks
expect_error(
link_db(db, "Activity", "Bluetooth", offset_before = 1800, external = dat1),
"Either a second sensor or an external data frame must be supplied."
)
expect_error(
link_db(db, "Activity", offset_before = 1800),
"Either a second sensor or an external data frame must be supplied."
)
# Check time zone differences
dat1$time <- .POSIXct(dat1$time, tz = "Europe/Brussels")
expect_warning(
link_db(db, "Activity", external = dat1, offset_after = 1800),
"`external` is not using UTC as a time zone, unlike the data in the database."
)
dat1$time <- .POSIXct(dat1$time, tz = "UTC")
dbDisconnect(db)
# Check if ignore_large works
filename <- tempfile("big", fileext = ".db")
db <- create_db(NULL, filename)
# Populate database
add_study(db, study_id = "test-study", data_format = "CARP")
add_participant(db, participant_id = "12345", study_id = "test-study")
sens_value <- seq.int(0, 10, length.out = 50001)
time_value <- seq.POSIXt(as.POSIXct("2021-11-14 14:00:00.000", format = "%F %H:%M:%OS"),
by = "sec",
length.out = 50001
)
acc <- data.frame(
measurement_id = paste0("id_", 1:50001),
participant_id = "12345",
date = "2021-11-14",
time = strftime(time_value, format = "%H:%M:%OS3"),
x = sens_value,
y = sens_value,
z = sens_value
)
DBI::dbWriteTable(db, "Accelerometer", acc, overwrite = TRUE)
DBI::dbWriteTable(db, "Gyroscope", acc, overwrite = TRUE)
expect_error(
link_db(db, "Accelerometer", "Gyroscope", offset_after = 30),
"the total number of rows is higher than 100000. Use ignore_large = TRUE to continue"
)
expect_error(
link_db(db, "Accelerometer", "Gyroscope", offset_after = 30, ignore_large = TRUE),
"`x` and `y` are identical"
)
# Cleanup
dbDisconnect(db)
file.remove(filename)
})
## link_gaps =================
test_that("link_gaps", {
dat1 <- data.frame(
participant_id = c(rep("12345", 6), rep("23456", 6)),
time = rep(seq.POSIXt(as.POSIXct("2021-11-14 13:00:00"), by = "1 hour", length.out = 6), 2),
item_one = rep(seq.int(10, by = 10, length.out = 6), 2)
)
# Test with two participants to ensure link takes different groups into account
# Test both before and after each beep
# 1. the gap falls completely inside the beep interval
# 2. the start of the gap falls inside the beep interval, but the end does not
# 3. the start of the gap falls outside of the beep interval, but the end of the gap falls inside
# 4. the gap spans over the entire interval
# 5. the gap occurs entirely before the interval
# 6. the gap occurs entirely after the interval
dat2 <- data.frame(
participant_id = c(rep("12345", 46), rep("23456", 46)),
from = rep(c(
seq.POSIXt(as.POSIXct("2021-11-14 12:40:00"), by = "1 min", length.out = 20), # 1 before
seq.POSIXt(as.POSIXct("2021-11-14 13:10:00"), by = "1 min", length.out = 20), # 1 after
as.POSIXct(c(
"2021-11-14 13:55:00", # 2 before, 3 after
"2021-11-14 14:25:00", # 3 before, 2 after
"2021-11-14 15:25:00", # 2 after
"2021-11-14 15:30:00", # 4 before, after
"2021-11-14 12:15:00", # 5 before, after
"2021-11-14 18:35:00" # 6 before, after
))
), 2),
to = rep(c(
seq.POSIXt(as.POSIXct("2021-11-14 12:41:00"), by = "1 min", length.out = 20), # 1 before
seq.POSIXt(as.POSIXct("2021-11-14 13:11:00"), by = "1 min", length.out = 20), # 1 after
as.POSIXct(c(
"2021-11-14 14:05:00", # 2 before, 3 after
"2021-11-14 14:40:00", # 3 before
"2021-11-14 15:30:00", # 2 after
"2021-11-14 16:30:00", # 4 before, after
"2021-11-14 12:25:00", # 5 before, after
"2021-11-14 18:40:00" # 6 before, after
))
), 2)
)
# Test difference types of input for offset_before
# Integer vs double
expect_equal(
link_gaps(dat1, dat2, by = "participant_id", offset_before = 1800L),
link_gaps(dat1, dat2, by = "participant_id", offset_before = 1800)
)
expect_equal(
link_gaps(dat1, dat2, by = "participant_id", offset_before = 1800L),
link_gaps(dat1, dat2, by = "participant_id", offset_before = lubridate::minutes(30))
)
expect_equal(
link_gaps(dat1, dat2, by = "participant_id", offset_before = 1800L),
link_gaps(dat1, dat2, by = "participant_id", offset_before = "30 minutes")
)
expect_equal(
link_gaps(dat1, dat2, by = "participant_id", offset_before = 1800L),
link_gaps(dat1, dat2, by = "participant_id", offset_before = "1800 seconds")
)
# Offset after
expect_equal(
link_gaps(dat1, dat2, by = "participant_id", offset_after = 1800L),
link_gaps(dat1, dat2, by = "participant_id", offset_after = 1800)
)
expect_equal(
link_gaps(dat1, dat2, by = "participant_id", offset_after = 1800L),
link_gaps(dat1, dat2, by = "participant_id", offset_after = lubridate::minutes(30))
)
expect_equal(
link_gaps(dat1, dat2, by = "participant_id", offset_after = 1800L),
link_gaps(dat1, dat2, by = "participant_id", offset_after = "30 minutes")
)
expect_equal(
link_gaps(dat1, dat2, by = "participant_id", offset_after = 1800L),
link_gaps(dat1, dat2, by = "participant_id", offset_after = "1800 seconds")
)
# Offset_before, raw_data = TRUE
res_raw <- link_gaps(
data = dat1,
gaps = dat2,
by = "participant_id",
offset_before = 1800L,
offset_after = 0L,
raw_data = TRUE
)
true <- tibble::tibble(
participant_id = c(rep("12345", 6), rep("23456", 6)),
time = rep(seq.POSIXt(as.POSIXct("2021-11-14 13:00:00"), by = "1 hour", length.out = 6), 2),
item_one = rep(seq.int(10, by = 10, length.out = 6), 2),
gap_data = rep(list(
tibble::tibble(
from = seq.POSIXt(as.POSIXct("2021-11-14 12:40:00"), by = "1 min", length.out = 20),
to = seq.POSIXt(as.POSIXct("2021-11-14 12:41:00"), by = "1 min", length.out = 20),
gap = rep(60, 20)
), # 1
tibble::tibble(
from = as.POSIXct("2021-11-14 13:55:00"),
to = as.POSIXct("2021-11-14 14:00:00"),
gap = 300
), # 3
tibble::tibble(
from = as.POSIXct("2021-11-14 14:30:00"),
to = as.POSIXct("2021-11-14 14:40:00"),
gap = 600
), # 3
tibble::tibble(
from = as.POSIXct("2021-11-14 15:30:00"),
to = as.POSIXct("2021-11-14 16:00:00"),
gap = 1800
), # 4
tibble::tibble(
from = as.POSIXct(double(0), tz = ""),
to = as.POSIXct(double(0), tz = ""),
gap = integer(0)
), # 5
tibble::tibble(
from = as.POSIXct(double(0), tz = ""),
to = as.POSIXct(double(0), tz = ""),
gap = integer(0)
) # 6
), 2),
gap = rep(c(1200, 300, 600, 1800, 0, 0), 2)
)
expect_equal(res_raw, true)
# Check accidental double link
expect_error(
link_gaps(
data = res_raw,
gaps = dat2,
by = "participant_id",
offset_before = 1800L,
raw_data = TRUE
),
"column 'gap' should not already be present in data"
)
expect_error(
link_gaps(
data = res_raw %>% dplyr::select(-gap),
gaps = dat2,
by = "participant_id",
offset_before = 1800L,
raw_data = TRUE
),
"column 'gap_data' should not already be present in data"
)
expect_error(
link_gaps(
data = res_raw %>% dplyr::select(-gap),
gaps = dat2,
by = "participant_id",
offset_before = 1800L,
raw_data = FALSE
),
NA
)
# Scrambled test
scramble <- function(data) {
idx <- sample(seq_along(data[, 1]), nrow(data))
data[idx, ]
}
res <- link_gaps(
data = scramble(dat1),
gaps = scramble(dat2),
by = "participant_id",
offset_before = 1800,
raw_data = TRUE
) %>%
arrange(participant_id, time)
expect_equal(res_raw, true)
# offset_before, raw_data = FALSE
res <- link_gaps(
data = dat1,
gaps = dat2,
by = "participant_id",
offset_before = 1800L,
offset_after = 0L
)
true <- tibble::tibble(
participant_id = c(rep("12345", 6), rep("23456", 6)),
time = rep(seq.POSIXt(as.POSIXct("2021-11-14 13:00:00"), by = "1 hour", length.out = 6), 2),
item_one = rep(seq.int(10, by = 10, length.out = 6), 2),
gap = rep(c(1200, 300, 600, 1800, 0, 0), 2)
)
expect_equal(res, true)
# Test whether results from raw_data = FALSE and TRUE are the same
expect_equal(
res_raw %>%
mutate(gap = purrr::map_dbl(gap_data, ~ sum(.x$gap))) %>%
dplyr::select(-gap_data),
res
)
# Offset_after, raw_data = TRUE
res_raw <- link_gaps(
data = dat1,
gaps = dat2,
by = "participant_id",
offset_before = 0L,
offset_after = 1800L,
raw_data = TRUE
)
true <- tibble::tibble(
participant_id = c(rep("12345", 6), rep("23456", 6)),
time = rep(seq.POSIXt(as.POSIXct("2021-11-14 13:00:00"), by = "1 hour", length.out = 6), 2),
item_one = rep(seq.int(10, by = 10, length.out = 6), 2),
gap_data = rep(list(
tibble::tibble(
from = seq.POSIXt(as.POSIXct("2021-11-14 13:10:00"), by = "1 min", length.out = 20),
to = seq.POSIXt(as.POSIXct("2021-11-14 13:11:00"), by = "1 min", length.out = 20),
gap = rep(60, 20)
), # 1
tibble::tibble(
from = c(as.POSIXct("2021-11-14 14:00:00"), as.POSIXct("2021-11-14 14:25:00")),
to = c(as.POSIXct("2021-11-14 14:05:00"), as.POSIXct("2021-11-14 14:30:00")),
gap = 300
), # 2
tibble::tibble(
from = as.POSIXct("2021-11-14 15:25:00"),
to = as.POSIXct("2021-11-14 15:30:00"),
gap = 300
), # 2
tibble::tibble(
from = as.POSIXct("2021-11-14 16:00:00"),
to = as.POSIXct("2021-11-14 16:30:00"),
gap = 1800
), # 4
tibble::tibble(
from = as.POSIXct(double(0), tz = ""),
to = as.POSIXct(double(0), tz = ""),
gap = integer(0)
), # 5
tibble::tibble(
from = as.POSIXct(double(0), tz = ""),
to = as.POSIXct(double(0), tz = ""),
gap = integer(0)
) # 6
), 2),
gap = rep(c(1200, 600, 300, 1800, 0, 0), 2)
)
expect_equal(res_raw, true)
# offset_after, raw_data = FALSE
res <- link_gaps(
data = dat1,
gaps = dat2,
by = "participant_id",
offset_before = 0L,
offset_after = 1800L
)
true <- tibble::tibble(
participant_id = c(rep("12345", 6), rep("23456", 6)),
time = rep(seq.POSIXt(as.POSIXct("2021-11-14 13:00:00"), by = "1 hour", length.out = 6), 2),
item_one = rep(seq.int(10, by = 10, length.out = 6), 2),
gap = rep(c(1200, 600, 300, 1800, 0, 0), 2)
)
expect_equal(res, true)
# Test whether results from raw_data = FALSE and TRUE are the same
expect_equal(
res_raw %>%
mutate(gap = purrr::map_dbl(gap_data, ~ sum(.x$gap))) %>%
dplyr::select(-gap_data),
res
)
# Offset both
res_raw <- link_gaps(
data = dat1,
gaps = dat2,
by = "participant_id",
offset_before = 1800L,
offset_after = 1800L,
raw_data = TRUE
)
true <- tibble::tibble(
participant_id = c(rep("12345", 6), rep("23456", 6)),
time = rep(seq.POSIXt(as.POSIXct("2021-11-14 13:00:00"), by = "1 hour", length.out = 6), 2),
item_one = rep(seq.int(10, by = 10, length.out = 6), 2),
gap_data = rep(list(
tibble::tibble(
from = c(
seq.POSIXt(as.POSIXct("2021-11-14 12:40:00"), by = "1 min", length.out = 20),
seq.POSIXt(as.POSIXct("2021-11-14 13:10:00"), by = "1 min", length.out = 20)
),
to = c(
seq.POSIXt(as.POSIXct("2021-11-14 12:41:00"), by = "1 min", length.out = 20),
seq.POSIXt(as.POSIXct("2021-11-14 13:11:00"), by = "1 min", length.out = 20)
),
gap = rep(60, 40)
), # 1
tibble::tibble(
from = c(as.POSIXct("2021-11-14 13:55:00"), as.POSIXct("2021-11-14 14:25:00")),
to = c(as.POSIXct("2021-11-14 14:05:00"), as.POSIXct("2021-11-14 14:30:00")),
gap = c(600, 300)
), # 2
tibble::tibble(
from = c(as.POSIXct("2021-11-14 14:30:00"), as.POSIXct("2021-11-14 15:25:00")),
to = c(as.POSIXct("2021-11-14 14:40:00"), as.POSIXct("2021-11-14 15:30:00")),
gap = c(600, 300)
), # 2
tibble::tibble(
from = as.POSIXct("2021-11-14 15:30:00"),
to = as.POSIXct("2021-11-14 16:30:00"),
gap = 3600
), # 4
tibble::tibble(
from = as.POSIXct(double(0), tz = ""),
to = as.POSIXct(double(0), tz = ""),
gap = integer(0)
), # 5
tibble::tibble(
from = as.POSIXct(double(0), tz = ""),
to = as.POSIXct(double(0), tz = ""),
gap = integer(0)
) # 6
), 2),
gap = rep(c(2400, 900, 900, 3600, 0, 0), 2)
)
expect_equal(res_raw, true)
# offset_both
res <- link_gaps(
data = dat1,
gaps = dat2,
by = "participant_id",
offset_before = 1800L,
offset_after = 1800L
)
true <- tibble::tibble(
participant_id = c(rep("12345", 6), rep("23456", 6)),
time = rep(seq.POSIXt(as.POSIXct("2021-11-14 13:00:00"), by = "1 hour", length.out = 6), 2),
item_one = rep(seq.int(10, by = 10, length.out = 6), 2),
gap = rep(c(2400, 900, 900, 3600, 0, 0), 2)
)
expect_equal(res, true)
# Test whether results from raw_data = FALSE and TRUE are the same
expect_equal(
res_raw %>%
mutate(gap = purrr::map_dbl(gap_data, ~ sum(.x$gap))) %>%
dplyr::select(-gap_data),
res
)
# Argument checks
expect_error(
link_gaps(
data = dat1[, -2],
gaps = dat2,
by = "participant_id",
offset_after = 1800
),
"Column `time` must be present in `data`"
)
expect_error(
link_gaps(
data = dat1,
gaps = dat2[, -2],
by = "participant_id",
offset_after = 1800
),
"Column `from` and `to` must be present in `gaps`."
)
expect_error(
link_gaps(
data = mutate(dat1, time = as.character(time)),
gaps = dat2,
by = "participant_id",
offset_after = 1800
),
"Column `time` in `data` must be a POSIXct."
)
})
## bin_data =================
test_that("bin_data", {
data <- tibble::tibble(
participant_id = 1,
datetime = c(
"2022-06-21 15:00:00", "2022-06-21 15:55:00",
"2022-06-21 17:05:00", "2022-06-21 17:10:00"
),
confidence = 100,
type = "WALKING"
)
# get bins per hour, even if the interval is longer than one hour
res <- data %>%
mutate(datetime = as.POSIXct(datetime)) %>%
mutate(lead = lead(datetime)) %>%
bin_data(
start_time = datetime,
end_time = lead,
by = "hour"
)
true <- tibble::tibble(
bin = as.POSIXct(c("2022-06-21 15:00:00", "2022-06-21 16:00:00", "2022-06-21 17:00:00")),
bin_data = list(
tibble::tibble(
participant_id = 1,
datetime = as.POSIXct(c("2022-06-21 15:00:00", "2022-06-21 15:55:00")),
confidence = 100,
type = "WALKING",
lead = as.POSIXct(c("2022-06-21 15:55:00", "2022-06-21 16:00:00"))
),
tibble::tibble(
participant_id = 1,
datetime = as.POSIXct(c("2022-06-21 16:00:00")),
confidence = 100,
type = "WALKING",
lead = as.POSIXct("2022-06-21 17:00:00")
),
tibble::tibble(
participant_id = 1,
datetime = as.POSIXct(c(
"2022-06-21 17:00:00", "2022-06-21 17:05:00",
"2022-06-21 17:10:00"
)),
confidence = 100,
type = "WALKING",
lead = as.POSIXct(c("2022-06-21 17:05:00", "2022-06-21 17:10:00", NA))
)
)
)
expect_equal(res, true)
# Alternatively, you can give an integer value to by to create custom-sized
# bins, but only if fixed = FALSE. Not that these bins are not rounded to,
# as in this example 30 minutes, but rather depends on the earliest time
# in the group.
res <- data %>%
mutate(datetime = as.POSIXct(datetime)) %>%
mutate(lead = lead(datetime)) %>%
bin_data(
start_time = datetime,
end_time = lead,
by = 1800L,
fixed = FALSE
)
true <- tibble::tibble(
bin = as.POSIXct(c(
"2022-06-21 15:00:00", "2022-06-21 15:30:00", "2022-06-21 16:00:00",
"2022-06-21 16:30:00", "2022-06-21 17:00:00"
)),
bin_data = list(
tibble::tibble(
participant_id = 1,
datetime = as.POSIXct(c("2022-06-21 15:00:00")),
confidence = 100,
type = "WALKING",
lead = as.POSIXct(c("2022-06-21 15:30:00"))
),
tibble::tibble(
participant_id = 1,
datetime = as.POSIXct(c("2022-06-21 15:30:00", "2022-06-21 15:55:00")),
confidence = 100,
type = "WALKING",
lead = as.POSIXct(c("2022-06-21 15:55:00", "2022-06-21 16:00:00"))
),
tibble::tibble(
participant_id = 1,
datetime = as.POSIXct(c("2022-06-21 16:00:00")),
confidence = 100,
type = "WALKING",
lead = as.POSIXct(c("2022-06-21 16:30:00"))
),
tibble::tibble(
participant_id = 1,
datetime = as.POSIXct(c("2022-06-21 16:30:00")),
confidence = 100,
type = "WALKING",
lead = as.POSIXct(c("2022-06-21 17:00:00"))
),
tibble::tibble(
participant_id = 1,
datetime = as.POSIXct(c(
"2022-06-21 17:00:00", "2022-06-21 17:05:00",
"2022-06-21 17:10:00"
)),
confidence = 100,
type = "WALKING",
lead = as.POSIXct(c("2022-06-21 17:05:00", "2022-06-21 17:10:00", NA))
)
)
)
expect_equal(res, true)
# More complicated data for showcasing grouping:
data <- tibble::tibble(
participant_id = c(rep(1, 4), rep(2, 4)),
datetime = rep(c(
"2022-06-21 15:00:00", "2022-06-21 15:55:00",
"2022-06-21 17:05:00", "2022-06-21 17:10:00"
), 2),
confidence = 100,
type = rep(c("STILL", "WALKING", "STILL", "WALKING"), 2)
)
# binned_intervals also takes into account the prior grouping structure
res <- data %>%
mutate(datetime = as.POSIXct(datetime)) %>%
group_by(participant_id) %>%
mutate(lead = lead(datetime)) %>%
group_by(participant_id, type) %>%
bin_data(
start_time = datetime,
end_time = lead,
by = "hour"
)
true <- tibble::tibble(
participant_id = c(rep(1, 6), rep(2, 6)),
type = rep(c("STILL", "STILL", "STILL", "WALKING", "WALKING", "WALKING"), 2),
bin = rep(as.POSIXct(c(
"2022-06-21 15:00:00", "2022-06-21 16:00:00",
"2022-06-21 17:00:00"
)), 4),
bin_data = rep(list(
# STILL
tibble::tibble(
datetime = as.POSIXct(c("2022-06-21 15:00:00")),
confidence = 100,
lead = as.POSIXct(c("2022-06-21 15:55:00"))
),
tibble::tibble(
datetime = as.POSIXct(double(0)),
confidence = double(0),
lead = as.POSIXct(double(0))
),
tibble::tibble(
datetime = as.POSIXct(c("2022-06-21 17:05:00")),
confidence = 100,
lead = as.POSIXct(c("2022-06-21 17:10:00"))
),
# WALKING
tibble::tibble(
datetime = as.POSIXct(c("2022-06-21 15:55:00")),
confidence = 100,
lead = as.POSIXct(c("2022-06-21 16:00:00"))
),
tibble::tibble(
datetime = as.POSIXct(c("2022-06-21 16:00:00")),
confidence = 100,
lead = as.POSIXct(c("2022-06-21 17:00:00"))
),
tibble::tibble(
datetime = as.POSIXct(c("2022-06-21 17:00:00", "2022-06-21 17:10:00")),
confidence = 100,
lead = as.POSIXct(c("2022-06-21 17:05:00", NA))
)
), 2)
) %>%
group_by(participant_id, type)
expect_equal(res, true)
# To get the duration for each bin (note to change the variable names in sum):
duration <- purrr::map_dbl(
.x = res$bin_data,
.f = ~ sum(as.double(.x$lead) - as.double(.x$datetime),
na.rm = TRUE
) / 60
)
# Or:
duration2 <- res %>%
unnest(bin_data, keep_empty = TRUE) %>%
mutate(duration = .data$lead - .data$datetime) %>%
group_by(bin, .add = TRUE) %>%
summarise(duration = sum(.data$duration, na.rm = TRUE), .groups = "drop")
true <- c(55, 0, 5, 5, 60, 5, 55, 0, 5, 5, 60, 5)
expect_equal(duration, as.double(duration2$duration))
expect_equal(duration, true)
expect_equal(as.double(duration2$duration), true)
# Argument checks
data <- data %>%
mutate(datetime = as.POSIXct(datetime)) %>%
mutate(lead = lead(datetime))
expect_error(
bin_data(
data = data,
start_time = datetime,
end_time = lead,
by = TRUE
),
"`by` must be one of 'sec', 'min', 'hour', or 'day', or a numeric value if `fixed = FALSE`."
)
expect_error(
data %>%
mutate(datetime = as.character(datetime)) %>%
bin_data(
start_time = datetime,
end_time = lead,
by = "hour"
),
NA
)
# Test bug #8: bin_data() incorrectly rounded off days after DST change
data <- tibble::tibble(
participant_id = 1,
datetime = as.POSIXct(
c(
"2022-10-30 15:00:00", "2022-10-30 15:55:00",
"2022-10-31 17:05:00", "2022-10-31 17:10:00"
),
tz = "Europe/Brussels"
),
confidence = 100,
type = "WALKING"
)
res <- data %>%
mutate(datetime = as.POSIXct(datetime)) %>%
mutate(lead = lead(datetime)) %>%
bin_data(
start_time = datetime,
end_time = lead,
by = "day"
)
expect_equal(lubridate::hour(res$bin), c(0, 0))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.