test-remove_partial_data.R
In LightLogR: Process Data from Wearable Light Loggers and Optical Radiation Dosimeters

test_that("Input validation for remove_partial_data", {
  data_ok <- tibble::tibble(
    Id = "A",
    Datetime = lubridate::as_datetime(0) + lubridate::hours(0:2),
    Value = 1:3
  ) %>% dplyr::group_by(Id)
  
  expect_error(remove_partial_data(list()), "dataset must be a data frame")
  expect_error(remove_partial_data(data_ok, Variable.colname = NonExistent),
               "Variable.colname 'NonExistent' not found in dataset")
  expect_error(remove_partial_data(data_ok, Datetime.colname = NonExistentDT),
               "Datetime.colname 'NonExistentDT' not found in dataset")
  
  data_bad_dt_type <- data_ok %>% dplyr::mutate(Datetime = as.character(Datetime))
  expect_error(remove_partial_data(data_bad_dt_type),
               "Datetime.colname 'Datetime' must be of type POSIXct")
  
  expect_error(remove_partial_data(data_ok, threshold.missing = -0.1),
               "threshold.missing as percentage must be between 0 and 1")
  expect_error(remove_partial_data(data_ok, threshold.missing = 1.1),
               "threshold.missing as percentage must be between 0 and 1")
  
  expect_error(remove_partial_data(data_ok, show.result = "TRUE"),
               "show.result must be logical")
  expect_error(remove_partial_data(data_ok, handle.gaps = "FALSE"),
               "handle.gaps must be logical")
  expect_error(remove_partial_data(data_ok, by.date = "FALSE"),
               "by.date must be logical")
})

test_that("remove_partial_data with show.result = TRUE", {
  data_test <- tibble::tibble(
    Id = rep(c("X", "Y", "Z"), each = 10),
    Datetime = lubridate::as_datetime(0) + lubridate::minutes(rep(seq(0, 90, by = 10), 3)), # 10 pts, 9 intervals of 10min
    Value = c(rep(1, 2), rep(NA, 8),  # X: 8 NA -> 80min missing / 90min total = 0.888 miss_pct
              rep(1, 8), rep(NA, 2),  # Y: 2 NA -> 20min missing / 90min total = 0.222 miss_pct
              rep(1, 10))             # Z: 0 NA -> 0min missing / 90min total = 0 miss_pct
  ) %>% dplyr::group_by(Id)
  
  # Percentage threshold
  res_pct <- remove_partial_data(data_test, Value, threshold.missing = 0.5, show.result = TRUE, handle.gaps = FALSE)
  expect_true(is.data.frame(res_pct))
  expect_true("marked.for.removal" %in% names(res_pct))
  expect_equal(res_pct$marked.for.removal, c(TRUE, FALSE, FALSE)) # X removed, Y Z kept
  expect_equal(res_pct$Id, c("X", "Y", "Z"))
  
  # Duration threshold
  res_dur <- remove_partial_data(data_test, Value, threshold.missing = "30 min", show.result = TRUE, handle.gaps = FALSE)
  # X: 80min missing > 30min. Y: 20min missing < 30min. Z: 0min missing < 30min
  expect_equal(res_dur$marked.for.removal, c(TRUE, FALSE, FALSE))
  
  # handle.gaps = TRUE
  data_implicit_gaps <- tibble::tibble(
    Id = "P",
    Datetime = lubridate::as_datetime(0) + lubridate::hours(c(0, 1, 3, 4)), # implicit gap at 2h
    Value = c(1, 2, NA, 5) # explicit NA at 3h (becomes 2h after gap filling)
  ) %>% dplyr::group_by(Id)
  # gap_handler(full.days=T) on P: DTs 0,1,2,3,4. Values: 1,2,NA(implicit),NA(original),5
  # Total 5 points, 4 intervals.
  # Missing values at DT=2h (implicit) and DT=3h (original).
  # Missing duration is 2 hours. Total duration is 4 hours. missing_pct = 0.5
  res_hg <- remove_partial_data(data_implicit_gaps, Value, threshold.missing = 0.4, show.result = TRUE, handle.gaps = TRUE)
  expect_true(res_hg$marked.for.removal) # 0.5 > 0.4
  
  # by.date = TRUE
  data_daily <- tibble::tibble(
    Id = "A",
    Datetime = lubridate::as_datetime(0) + lubridate::dhours(c(0:2, 24:25)), # Day1: 0-2h (3pts, 2 int), Day2: 24-25h (2pts, 1 int)
    Value = c(1, NA, 3, NA, 5) # Day1: 1 NA at 1h. Day2: 1 NA at 24h
  ) %>% dplyr::group_by(Id)
  # Day1 (date 0): DTs 0,1,2. Vals 1,NA,3. missing 1h, total 2h. pct 0.33
  # Day2 (date 1): DTs 24,25. Vals NA,5. missing 1h, total 1h. pct 0.5
  res_bd <- remove_partial_data(data_daily, Value, threshold.missing = 0.4, show.result = TRUE, by.date = TRUE, handle.gaps = FALSE)
  expect_equal(nrow(res_bd), 2) # One row per Id-Date group
  expect_equal(res_bd$marked.for.removal, c(FALSE, TRUE)) # Day1 kept (0.5 < 0.6), Day2 removed (1.0 > 0.6)
  expect_true(".date" %in% names(res_bd))
  
  # Single point group (marked.for.removal should be TRUE due to NA)
  data_single_pt <- tibble::tibble(Id = "S", Datetime = lubridate::now(), Value = 1) %>% dplyr::group_by(Id)
  
  expect_message(
  expect_message(
    res_sng <- remove_partial_data(data_single_pt, Value, threshold.missing = 0.1, show.result = TRUE)
    )
  )
  expect_true(res_sng$marked.for.removal)
})

test_that("remove_partial_data with show.result = FALSE (dataset filtering)", {
  data_test <- tibble::tibble(
    Id = rep(c("X", "Y", "Z"), each = 10),
    Datetime = lubridate::as_datetime(0) + lubridate::minutes(rep(seq(0, 90, by = 10), 3)),
    Value = c(rep(1, 2), rep(NA, 8), rep(1, 8), rep(NA, 2), rep(1, 10))
  ) %>% dplyr::group_by(Id) # X: 88.8% miss, Y: 22.2% miss, Z: 0% miss
  
  # Percentage threshold
  res_pct_filt <- remove_partial_data(data_test, Value, threshold.missing = 0.5, show.result = FALSE)
  expect_equal(unique(res_pct_filt$Id), c("Y", "Z"))
  expect_equal(nrow(res_pct_filt), 20) # Y and Z groups, 10 rows each
  
  # Duration threshold
  res_dur_filt <- remove_partial_data(data_test, Value, threshold.missing = "30 min", show.result = FALSE)
  expect_equal(unique(res_dur_filt$Id), c("Y", "Z")) # X (80min missing) removed
  
  # No groups removed
  res_none_removed <- remove_partial_data(data_test, Value, threshold.missing = 0.9, show.result = FALSE)
  expect_equal(nrow(res_none_removed), 30)
  expect_equal(unique(res_none_removed$Id), c("X", "Y", "Z"))
  
  # All groups removed
  data_all_bad <- tibble::tibble(Id = c("U", "V"), Datetime = lubridate::now(), Value = c(NA, NA)) %>% dplyr::group_by(Id)
  # Each group is a single NA point, will be marked for removal.
  expect_message(
  expect_message(
  expect_message(res_all_removed <- remove_partial_data(data_all_bad, Value, threshold.missing = 0.1, show.result = FALSE),
                 "No groups are left after removing insufficient groups"),
  "This dataset has irregular or singular data")
  )
  # Current behavior returns the summary table, not an empty dataset
  expect_true("marked.for.removal" %in% names(res_all_removed))
  expect_equal(nrow(res_all_removed), 2) # Summary table has 2 rows for U, V
  
  # handle.gaps = FALSE with messages
  data_implicit_gaps_msg <- tibble::tibble(Id = "P", Datetime = lubridate::as_datetime(0) + lubridate::hours(c(0, 2, 6)), Value = 1:3) %>% dplyr::group_by(Id)
  expect_message(remove_partial_data(data_implicit_gaps_msg, Value, handle.gaps = FALSE, show.result = FALSE),
                 "This dataset has implicit gaps.")
  
  data_irreg_msg <- tibble::tibble(Id = "Q", Datetime = lubridate::as_datetime(0) + lubridate::dhours(c(0, 1, 2, 2.5)), Value = 1:4) %>% dplyr::group_by(Id)
  expect_message(remove_partial_data(data_irreg_msg, Value, handle.gaps = FALSE, show.result = FALSE),
                 "This dataset has irregular or singular data.")
  
  # Variable.colname = Datetime (removes single point groups)
  data_single_mix <- tibble::tibble(
    Id = c("S1", "S2", "S2", "S3", "S3", "S3"),
    Datetime = lubridate::as_datetime(0) + lubridate::days(c(0, 10, 11, 20, 21, 22))
  ) %>% dplyr::group_by(Id) # S1 is single point
  expect_message(
    res_dt_var <- remove_partial_data(data_single_mix, Variable.colname = Datetime, threshold.missing = 0.1, show.result = FALSE, handle.gaps = FALSE),
    "This dataset has irregular or singular data")
  expect_equal(unique(res_dt_var$Id), c("S2", "S3"))
  expect_equal(nrow(res_dt_var), 5)
  
  # by.date = TRUE, show.result = FALSE
  data_daily_filt <- tibble::tibble(
    Id = "A",
    Datetime = lubridate::as_datetime(0) + lubridate::hours(c(0:2, 24:25)), # Day1: 0-2h (3pts), Day2: 24-25h (2pts)
    Value = c(1, 1, 3, NA, NA) # Day1: 0% NA. Day2: 100% NA (2 NAs / 2 total points)
  ) %>% dplyr::group_by(Id)
  # Day1: (date 0) DTs 0,1,2. Vals 1,1,3. missing 0h, total 2h. pct 0. Keep if thresh > 0.
  # Day2: (date 1) DTs 24,25. Vals NA,NA. missing 1h (1 interval), total 1h. pct 1.0. Remove if thresh < 1.
  res_bd_filt <- remove_partial_data(data_daily_filt, Value, threshold.missing = 0.6, show.result = FALSE, by.date = TRUE, handle.gaps = FALSE)
  expect_equal(nrow(res_bd_filt), 3) # Only Day1 data for Id A
  expect_equal(unique(lubridate::date(res_bd_filt$Datetime)), lubridate::date(lubridate::as_datetime(0)))
  expect_false(".date" %in% names(res_bd_filt))
  expect_equal(dplyr::group_vars(res_bd_filt), "Id") # Original grouping restored
})

test_that("remove_partial_data handles ungrouped data", {
  data_ungrouped <- tibble::tibble(
    Datetime = lubridate::as_datetime(0) + lubridate::hours(0:9),
    Value = c(rep(1,2), rep(NA,8)) # 80% missing
  )
  # Ungrouped data is treated as one large group
  res_filt <- remove_partial_data(data_ungrouped, Value, threshold.missing = 0.8, show.result = FALSE)
  expect_message(res_filt_msg <- remove_partial_data(data_ungrouped, Value, threshold.missing = 0.5, show.result = FALSE),
                 "No groups are left after removing insufficient groups")
  expect_true("marked.for.removal" %in% names(res_filt_msg)) # Returns summary
  
  res_kept <- remove_partial_data(data_ungrouped, Value, threshold.missing = 0.9, show.result = FALSE)
  expect_equal(nrow(res_kept), 10) # Kept as 0.8 < 0.9
})

test_that("remove_partial_data preserves original grouping if not by.date", {
  data_grp <- tibble::tibble(
    GrpCol = rep(c("G1", "G2"), each = 10),
    Id = rep(c("X", "Y"), each = 5, times = 2),
    Datetime = lubridate::as_datetime(0) + lubridate::minutes(rep(seq(0, 40, by = 10), 4)),
    Value = c(rep(1,10), rep(c(1,NA,NA,NA,NA),2)) # G1/X, G1/Y ok. G2/X, G2/Y 80% NA for Value
  ) %>% dplyr::group_by(GrpCol, Id)
  
  # Threshold 0.5. G2 groups (X, Y) should be removed.
  res <- remove_partial_data(data_grp, Value, threshold.missing = 0.5, show.result = FALSE)
  expect_equal(dplyr::group_vars(res), c("GrpCol", "Id"))
  expect_equal(unique(res$GrpCol), "G1")
  expect_equal(nrow(res), 10)
})

test_that("remove_partial_data handles explicit NAs in Variable.colname correctly with handle.gaps = TRUE", {
  # Test case from a user finding where NAs in Value column + handle.gaps = TRUE caused issue
  # with how 'missing' and 'total' were calculated by durations.
  # Durations calculates missing based on NAs in Variable.colname over intervals defined by Datetime.
  # gap_handler(full.days=TRUE) ensures Datetime sequence is dense.
  dataset <- tibble::tibble(
    Id = "Bird1",
    Datetime = lubridate::ymd_hms("2023-01-01 00:00:00") + lubridate::hours(0:5),
    Light = c(10, 20, NA, NA, 5, 1) # 2 NAs out of 6 points (5 intervals)
  ) %>% dplyr::group_by(Id)
  
  # handle.gaps=TRUE, full.days=TRUE for gap_handler
  # Datetime: 0,1,2,3,4,5. Light: 10,20,NA,NA,5,1
  # Epoch determined by dominant_epoch (1 hour).
  # gap_handler will make Datetime 00:00 to 23:00 for that day.
  # Original data 0-5h. So 6 points. Implicit NAs for Light from 6h to 23h (18 points).
  # Total points after gap_handler = 24. Total intervals = 23.
  # Explicit NAs: 2 (at 2h, 3h). Implicit NAs: 18 (from 6h to 23h). Total NAs = 20.
  # Missing duration = 20 * 1hour = 20 hours. Total duration = 23 hours.
  # Missing_pct = 20/23 approx 0.869
  
  # If threshold.missing = 0.5, Bird1 should be removed.
  res_show_true <- remove_partial_data(dataset, Light, threshold.missing = 0.5, handle.gaps = TRUE, show.result = TRUE)
  expect_true(res_show_true$marked.for.removal)
  expect_equal(res_show_true$missing, lubridate::duration(20, "hours"))
  expect_equal(res_show_true$total, lubridate::duration(24, "hours"))
  
  expect_message(res_show_false <- remove_partial_data(dataset, Light, threshold.missing = 0.5, handle.gaps = TRUE, show.result = FALSE))
  # Returns summary table
  expect_true("marked.for.removal" %in% names(res_show_false))
  
  # If threshold.missing = 0.9, Bird1 should be kept.
  res_kept <- remove_partial_data(dataset, Light, threshold.missing = 0.9, handle.gaps = TRUE, show.result = FALSE)
  expect_equal(nrow(res_kept), 6) # Original 6 rows are kept
  expect_equal(unique(res_kept$Id), "Bird1")
})

Any scripts or data that you put into this service are public.

LightLogR documentation built on June 10, 2025, 5:12 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

LightLogR
Process Data from Wearable Light Loggers and Optical Radiation Dosimeters

tests/testthat/test-remove_partial_data.R
In LightLogR: Process Data from Wearable Light Loggers and Optical Radiation Dosimeters

Try the LightLogR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

LightLogR Process Data from Wearable Light Loggers and Optical Radiation Dosimeters

tests/testthat/test-remove_partial_data.R In LightLogR: Process Data from Wearable Light Loggers and Optical Radiation Dosimeters

Try the LightLogR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

LightLogR
Process Data from Wearable Light Loggers and Optical Radiation Dosimeters

tests/testthat/test-remove_partial_data.R
In LightLogR: Process Data from Wearable Light Loggers and Optical Radiation Dosimeters