test_smoothingfunctions.R
In gcplyr: Wrangle and Analyze Growth Curve Data

library(testthat)
library(gcplyr)

test_that("Moving average returns correctly", {
  set.seed(1)
  data <- data.frame("time" = 1:100,
                     "dens" = 10/(1+exp(-.1*((1:100) - 50))) + 
                       rnorm(100, sd = 0.5))
  
  expect_error(moving_average(formula = dens ~ time + treat,
                              data = data, window_width_n = 3))
  expect_error(moving_average(formula = ~ time,
                              data = data, window_width_n = 3))
  expect_error(moving_average(formula = dens ~ lime,
                              data = data, window_width_n = 3))
  expect_error(moving_average(formula = lens ~ time,
                              data = data, window_width_n = 3))
  expect_error(moving_average(formula = dens ~ time,
                              data = data, window_width_n = 4))
  expect_identical(moving_average(formula = dens ~ time,
                                  data = data, window_width = 0),
                   expected = data$dens)
  expect_identical(moving_average(formula = dens ~ time,
                                  data = data, window_width_n = 1),
                   expected = data$dens)
  
  manual_expect_win5 <- c(NA, NA, rep(0, (nrow(data)-4)), NA, NA)
  for (i in 3:98) {manual_expect_win5[i] <- mean(data$dens[(i-2):(i+2)])}
  expect_equal(moving_average(formula = dens ~ time,
                                  data = data, window_width_n = 5),
                   expected = manual_expect_win5)
  
  #Now test with out-of-order data
  data2 <- data.frame(x = c(6:9, 1:5), y = c(6:9, 1:5)**2)
  expect_equal(moving_average(y ~ x, data2, window_width_n = 3),
               expected = c((25+36+49)/3, (36+49+64)/3, (49+64+81)/3,
                            NA, NA, (1+4+9)/3, (4+9+16)/3, (9+16+25)/3,
                            (16+25+36)/3))
  
  #Now test with NA's and out-of-order data
  data3 <- data.frame(x = c(6:9,NA, 1:5), 
                      y = c(NA, c(7:9)**2, 50, c(1:5)**2))
  expect_equal(moving_average(y ~ x, data3, window_width_n = 3),
               expected = c(NA, (25+49+64)/3, (49+64+81)/3,
                            NA, NA, NA, (1+4+9)/3,
                            (4+9+16)/3, (9+16+25)/3, (16+25+49)/3))
})

test_that("Moving median returns correctly", {
  set.seed(1)
  data <- data.frame("time" = 1:100,
                     "dens" = 10/(1+exp(-.1*((1:100) - 50))) + 
                       rnorm(100, sd = 0.5))
  
  expect_error(moving_median(formula = dens ~ time + treat,
                              data = data, window_width_n = 3))
  expect_error(moving_median(formula = ~ time,
                              data = data, window_width_n = 3))
  expect_error(moving_median(formula = dens ~ lime,
                              data = data, window_width_n = 3))
  expect_error(moving_median(formula = lens ~ time,
                              data = data, window_width_n = 3))
  expect_error(moving_median(formula = dens ~ time,
                              data = data, window_width_n = 4))
  expect_identical(moving_median(formula = dens ~ time,
                                  data = data, window_width_n = 1),
                   expected = data$dens)
  
  manual_expect_win5 <- c(NA, NA, rep(0, (nrow(data)-4)), NA, NA)
  for (i in 3:98) {manual_expect_win5[i] <- median(data$dens[(i-2):(i+2)])}
  expect_equal(moving_median(formula = dens ~ time,
                              data = data, window_width_n = 5),
               expected = manual_expect_win5)
  
  #Now test with out-of-order data
  data2 <- data.frame(x = c(6:9, 1:5), y = c(6:9, 1:5)**2)
  expect_equal(moving_median(y ~ x, data2, window_width_n = 3),
               expected = c(36, 49, 64, NA, NA, 4, 9, 16, 25))
  
  #Now test with NA's and out-of-order data
  data3 <- data.frame(x = c(6:9,NA, 1:5), 
                      y = c(NA, c(7:9)**2, 50, c(1:5)**2))
  expect_equal(moving_median(y ~ x, data3, window_width_n = 3),
               expected = c(NA, 49, 64, NA, NA, NA, 4, 9, 16, 25))
})

test_that("smooth_data returns properly for moving-average", {
  library(dplyr)
  set.seed(1)
  data <- data.frame("time" = 1:100,
                     "dens" = 10/(1+exp(-.1*((1:100) - 50))) +
                       rnorm(100, sd = 0.5),
                     "grp" = rep("A", 100))
  manual_expect_win5 <- c(NA, NA, rep(0, (nrow(data)-4)), NA, NA)
  for (i in 3:98) {manual_expect_win5[i] <- mean(data$dens[(i-2):(i+2)])}
  temp <- mutate(group_by(data, grp),
                        sm = smooth_data(x = time, y = dens,
                           sm_method = "moving-average",
                           window_width_n = 5))
  expect_equal(temp$sm, expected = manual_expect_win5)
  data2 <- data.frame("time" = c(1:100, 1:100),
                      "dens" = c(data$dens, data$dens + 10),
                      "treat" = rep(c("A", "B"), each = 100))
  expect_equal(smooth_data(x = data2$time, y = data2$dens,
                           sm_method = "moving-average",
                           subset_by = data2$treat,
                           window_width_n = 5),
               expected = c(manual_expect_win5, manual_expect_win5 + 10))
})

test_that("smooth_data returns properly for loess", {
  library(dplyr)
  set.seed(1)
  data <- data.frame("time" = 1:100,
                     "dens" = 10/(1+exp(-.1*((1:100) - 50))) +
                       rnorm(100, sd = 0.5),
                     "grp" = rep("A", 100))
  temp <- mutate(group_by(data, grp),
                 sm = smooth_data(x = time, y = dens,
                sm_method = "loess", span = 0.5))
  expect_equal(temp$sm, 
               expected = loess(dens ~ time, data, span = 0.5, 
                     na.action = "na.exclude")$fitted)
  
  #Now test with out-of-order data
  data2 <- data.frame(x = c(50:100, 1:49), y = sqrt(c(50:100, 1:49)),
                      grp = rep("A", 100))
  temp <- mutate(group_by(data2, grp),
                 sm = smooth_data(x = x, y = y,
                                  sm_method = "loess", span = 0.5))
  expect_equal(temp$sm,
               expected = loess(y ~ x, data2, span = 0.5,
                                na.action = "na.exclude")$fitted)
  
  #Now test with NA's and out-of-order data
  data3 <- data.frame(x = c(50:65, NA, 66:100, NA, 2:48, NA), 
                      y = c(NA, sqrt(50:75), NA, sqrt(76:100), sqrt(1:48)),
                      grp = rep("A", 101))
  expected3 <- predict(loess(y ~ x, data3, span = 0.5, na.action = "na.exclude"),
                       data3)
  names(expected3) <- NULL
  temp <- mutate(group_by(data3, grp),
                 sm = smooth_data(x = data3$x, y = data3$y,
                                  sm_method = "loess", span = 0.5))
  expect_equal(temp$sm, expected = expected3)
})

test_that("smooth_data returns properly for gam", {
  library(mgcv)
  library(dplyr)
  set.seed(1)
  data <- data.frame("time" = 1:100,
                     "dens" = 10/(1+exp(-.1*((1:100) - 50))) +
                       rnorm(100, sd = 0.5),
                     "grp" = rep("A", 100))
  expect1 <- predict(gam(formula = dens ~ s(time), data = data),
                     data)
  names(expect1) <- NULL
  dim(expect1) <- NULL
  temp <- mutate(group_by(data, grp),
                 sm = smooth_data(x = time, y = dens, 
                                  sm_method = "gam"))
  expect_equal(temp$sm, expected = expect1)

  data2 <- data.frame(x = c(50:100, 1:49), y = sqrt(c(50:100, 1:49)),
                      grp = rep("A", 100))
  expect2 <- predict(gam(formula = y ~ s(x), data = data2),
                     data2)
  names(expect2) <- NULL
  dim(expect2) <- NULL
  temp <- mutate(group_by(data2, grp),
                 sm = smooth_data(x = data2$x, y = data2$y, sm_method = "gam"))
  expect_equal(temp$sm, expected = expect2)
  
  #Now test with NA's and out-of-order data
  data3 <- data.frame(x = c(50:65, NA, 66:100, NA, 2:48, NA), 
                      y = c(NA, sqrt(50:75), NA, sqrt(76:100), sqrt(1:48)),
                      grp = rep("A", 101))
  expect3 <- predict(gam(formula = y ~ s(x), data = data3),
                     data3)
  names(expect3) <- NULL
  dim(expect3) <- NULL
  temp <- mutate(group_by(data3, grp),
                 sm = smooth_data(x = x, y = y, sm_method = "gam"))
  expect_equal(temp$sm, expected = expect3)
  
  #Now test when passing arguments for s() (e.g. k)
  expect4 <- predict(gam(formula = dens ~ s(time, k = 5), data = data),
                     data)
  names(expect4) <- NULL
  dim(expect4) <- NULL
  temp <- mutate(group_by(data, grp),
                 sm = smooth_data(x = time, y = dens, 
                                  sm_method = "gam", k = 5))
  expect_equal(temp$sm, expected = expect4)
  
  #& non-k args to s
  expect5 <-
    predict(gam(formula = dens ~ s(time, k = 5, bs = "cr"), data = data),
                     data)
  names(expect5) <- NULL
  dim(expect5) <- NULL
  temp <- mutate(group_by(data, grp),
                 sm = smooth_data(x = time, y = dens, sm_method = "gam",
                                  k = 5, bs = "cr"))
  expect_equal(temp$sm, expected = expect5)

  #& mix of args to s() and to gam()
  expect6 <-
    predict(gam(formula = dens ~ s(time, k = 5, bs = "cr"),
                data = data, subset = 1:90),
            data)
  names(expect6) <- NULL
  dim(expect6) <- NULL
  temp <- mutate(group_by(data, grp),
                 sm = smooth_data(x = time, y = dens, sm_method = "gam",
                                  k = 5, bs = "cr", subset = 1:90))
  expect_equal(temp$sm, expected = expect6)
})

test_that("smooth_data returns properly for smooth.spline", {
  add_nas <- function(x, nas_indices_removed) {
      return_indices <- 1:length(x)
      for (index in nas_indices_removed) {
        return_indices[return_indices >= index] <-
          return_indices[return_indices >= index] + 1
      }
      
      out <- rep(NA, length(x) + length(nas_indices_removed))
      out[return_indices] <- x
      
      return(out)
  }
  
  library(dplyr)
  set.seed(1)
  data <- data.frame("x" = 1:100,
                     "y" = 10/(1+exp(-.1*((1:100) - 50))) +
                       rnorm(100, sd = 0.5),
                     "grp" = rep("A", 100))
  expect1 <- stats::smooth.spline(x = data$x, y = data$y)$y
  temp <- mutate(group_by(data, grp),
                 sm = smooth_data(x = x, y = y, 
                                  sm_method = "smooth.spline"))
  expect_equal(temp$sm, expected = expect1)
  
  #Out of order data
  data2 <- data.frame(x = c(50:100, 1:49), y = sqrt(c(50:100, 1:49)),
                      grp = rep("A", 100))
  expect2 <- stats::smooth.spline(x = data2$x, y = data2$y)
  expect2$y <- expect2$y[match(data2$x, expect2$x)]
  temp <- mutate(group_by(data2, grp),
                 sm = smooth_data(x = x, y = y, sm_method = "smooth.spline"))
  expect_equal(temp$sm, expected = as.vector(expect2$y))
  
  #Now test with NA's and out-of-order data
  data3 <- data.frame(x = c(50:65, NA, 66:100, NA, 2:48, NA), 
                      y = c(NA, sqrt(50:75), NA, sqrt(76:100), sqrt(1:48)),
                      grp = rep("A", 101))
  expect3 <- 
    stats::smooth.spline(x = data3$x[!is.na(data3$x) & !is.na(data3$y)], 
                         y = data3$y[!is.na(data3$x) & !is.na(data3$y)])
  expect3$y <- expect3$y[match(data3$x, expect3$x)]
  temp <- mutate(group_by(data3, grp),
                 sm = smooth_data(x = x, y = y, sm_method = "smooth.spline"))
  expect_equal(temp$sm, expected = expect3$y)
  
  #Now test with duplicate x values
  data4 <- data.frame(x = c(50:100, 1:49, 50), y = sqrt(c(50:100, 1:49, 51)),
                      grp = rep("A", 101))
  expect4 <- 
    stats::smooth.spline(x = data4$x, y = data4$y)
  expect4$y <- expect4$y[match(data4$x, expect4$x)]
  temp <- mutate(group_by(data4, grp),
                 sm = smooth_data(x = x, y = y, sm_method = "smooth.spline"))
  expect_equal(temp$sm, expected = expect4$y)
  
  #Now test when passing arguments
  expect5 <- stats::smooth.spline(x = data$x, y = data$y, df = 7)$y
  temp <- mutate(group_by(data, grp),
                 sm = smooth_data(x = x, y = y, 
                                  sm_method = "smooth.spline", df = 7))
  expect_equal(temp$sm, expected = expect5)
  
  expect5 <- stats::smooth.spline(x = data$x, y = data$y, spar = 0.5)$y
  temp <- mutate(group_by(data, grp),
                 sm = smooth_data(x = x, y = y, 
                                  sm_method = "smooth.spline", spar = 0.5))
  expect_equal(temp$sm, expected = expect5)
})


test_that("smooth_data checks for grouping", {
  library(dplyr)
  mydf <- data.frame(x = 1:20, y = sqrt(1:20), 
                     grp = rep(c("A", "B"), each = 10))
  
  expect_warning(smooth_data(window_width_n = 5, x = mydf$x, y = mydf$y,
                             sm_method = 'moving-median'),
                 ".* called outside of dplyr::mutate and subset_by = NULL")
  expect_warning(mutate(mydf,
                        sm = smooth_data(window_width_n = 5, x = x, y = y, 
                                         sm_method = 'moving-median')),
                 ".* called on an ungrouped data.frame and subset_by = NULL")
  expect_no_warning(mutate(group_by(mydf, grp),
                           sm = smooth_data(window_width_n = 5, x = x, y = y, 
                                            sm_method = 'moving-median')))
})
  
test_that("predict_interpolation works correctly", {
  x <- 1:10
  y <- x**2
  
  expect_equal(predict_interpolation(x = x, y = y, newdata = c(2.5, 6.5)),
               c(6.5, 42.5))
  expect_equal(predict_interpolation(x = x, y = y, newdata = c(3, 5, 7)),
               c(3, 5, 7)**2)
  expect_equal(predict_interpolation(x = x, y = y, newdata = 0,
                                      extrapolate_predictions = FALSE),
               as.numeric(NA))
  expect_equal(predict_interpolation(x = x, y = y, newdata = 15,
                                      extrapolate_predictions = FALSE),
               as.numeric(NA))
  expect_equal(predict_interpolation(x = x, y = y, newdata = c(2.5, 5, 15),
                                      extrapolate_predictions = FALSE),
               c(6.5, 25, as.numeric(NA)))
  expect_equal(predict_interpolation(x = x, y = y, newdata = c(0, 11)),
               c(-2, 119))
  expect_equal(predict_interpolation(x = x, y = y, newdata = c(0, 0.5, 10.5, 11)),
               c(-2, -0.5, 109.5, 119))
})

test_that("train_smooth_data and train + makemethod_train_smooth_data match", {
  library(dplyr)
  library(caret)
  dat <- trans_wide_to_tidy(example_widedata, id_cols = "Time")

  ##Note: these tests were commented out because I didn't want to
  ## figure out how to set defaults that wouldn't raise warnings
  ## for having too small of spans
  # #Default conditions
  # for (mymethod in c("moving-average", "moving-median", "loess",
  #                    "gam", "smooth.spline")) {
  #   expect_equal(
  #     reframe(group_by(filter(dat, Well %in% c("A1", "A7")), Well),
  #             train = train_smooth_data(
  #               x = Time, y = Measurements,
  #               sm_method = mymethod,
  #               trControl = caret::trainControl(
  #                 method = "cv",
  #                 seeds = c(rep(list(c(1,1,1)), 26), 1)))),
  #     reframe(group_by(filter(dat, Well %in% c("A1", "A7")), Well),
  #             train = caret::train(
  #               x = data.frame(x = Time), y = Measurements,
  #               method = makemethod_train_smooth_data(sm_method = mymethod),
  #               trControl = caret::trainControl(
  #                 method = "cv",
  #                 seeds = c(rep(list(c(1,1,1)), 26), 1)))$results)
  #   )
  # }

  # #Specifying length
  # for (mymethod in c("moving-average", "moving-median", "loess",
  #                    "gam", "smooth.spline")) {
  #   expect_equal(
  #     reframe(group_by(filter(dat, Well %in% c("A1", "A7")), Well),
  #             train = train_smooth_data(
  #               x = Time, y = Measurements,
  #               sm_method = mymethod, tuneLength = 5,
  #               trControl = caret::trainControl(
  #                 method = "cv",
  #                 seeds = c(rep(list(c(1,1,1,1,1)), 26), 1)))),
  #     reframe(group_by(filter(dat, Well %in% c("A1", "A7")), Well),
  #             train = caret::train(
  #               x = data.frame(x = Time), y = Measurements,
  #               method = makemethod_train_smooth_data(sm_method = mymethod),
  #               tuneLength = 5,
  #               trControl = caret::trainControl(
  #                 method = "cv",
  #                 seeds = c(rep(list(c(1,1,1,1,1)), 26), 1)))$results)
  #   )
  # }

  methods <- c("moving-average", "moving-median", "loess",
               "gam", "smooth.spline")
  tuneGrids_list <- list(list("window_width_frac" = c(0.2, 0.4)),
                         list("window_width_frac" = c(0.2, 0.4)),
                         list("span" = c(0.5, 0.6)),
                         list("k" = c(7, 9)),
                         list("spar" = c(0.25, 0.5)))
  #Specifying parameter values
  for (i in 1:length(methods)) {
    mymethod <- methods[i]
    mytuneGrid <- tuneGrids_list[[i]]
    set.seed(1)
    actual <- reframe(group_by(filter(dat, Well %in% c("A1", "A7")), Well),
                      train = train_smooth_data(
                        x = Time, y = Measurements,
                        sm_method = mymethod,
                        tuneGrid = mytuneGrid,
                        trControl = caret::trainControl(
                          method = "cv",
                          seeds = c(rep(list(c(1,1,1)), 26), 1))))
    set.seed(1)
    expected <- reframe(group_by(filter(dat, Well %in% c("A1", "A7")), Well),
                        train = caret::train(
                          x = data.frame(x = Time), y = Measurements,
                          method = makemethod_train_smooth_data(
                            sm_method = mymethod, tuneGrid = as.data.frame(mytuneGrid)),
                          tuneGrid = as.data.frame(mytuneGrid),
                          trControl = caret::trainControl(
                            method = "cv",
                            seeds = c(rep(list(c(1,1,1)), 26), 1)))$results)
    
    expect_equal(actual, expected)
  }
})