tests/testthat/testCv.pcplus.R

compareImSy <- function(y, bandwidth) {
  n <- length(y)
  kernel <- function(x) ifelse(abs(x) <= 1 + 1e-15, 3 / 4 * (1 - x^2), 0)
  L <- max(0, min(as.integer(bandwidth * n + 1e-14), n - 1))
  K <- kernel(-L:L / (bandwidth * n))
  
  y - .kernelSmoothing(y, K)
}

compareImSX <- function(y, bandwidth) {
  n <- length(y)
  kernel <- function(x) ifelse(abs(x) <= 1 + 1e-15, 3 / 4 * (1 - x^2), 0)
  L <- max(0, min(as.integer(bandwidth * n + 1e-14), n - 1))
  K <- kernel(-L:L / (bandwidth * n))
  Matrix::bandSparse(n, n - 1, k = (-L + 1):L - 1, diagonals = .createImSX(n = n, K = rev(cumsum(K))))
}

compareXty <- function(y, bandwidth, x = NULL) {
  if (is.null(x)) {
    x <- compareImSX(y, bandwidth)
  }
  Matrix::t(x) %*% y / length(y)
}

compareXtX <- function(y, bandwidth, x = NULL) {
  if (is.null(x)) {
    x <- compareImSX(y, bandwidth)
  }
  Matrix::t(x) %*% x / length(y)
}

# y must be of even length
compareCv <- function(y, lambda, bandwidth, sd, thresh, maxit, loss = function(test, est) mean(abs(test - est))) {
  n <- length(y)
  
  if (n %% 2 == 1) {
    stop("n is odd")
  }
  
  V <- 2
  
  cv <- numeric(length(bandwidth))
  value <- list()
  
  
  for (indexBandwidth in seq(along = bandwidth)) {
    if (bandwidth[indexBandwidth] == Inf) {
      estRet <- numeric(n)
      
      for (i in 1:V) {
        J <- seq(i, n, V)
        mJ <- seq(along = y)[-J]
        
        pelt <- .pelt(obs = y[mJ], sd = sd)$est
        
        est <- numeric(n)
        est[1] <- pelt[1]
        est[mJ[-1]] <- diff(pelt)
        estRet[J] <- cumsum(est)[J]
      }
      
      cv[indexBandwidth] <- loss(test = y, est = estRet)
      value[[indexBandwidth]] <- NA
    } else {
      kernel <- function(x) ifelse(abs(x) <= 1 + 1e-15, 3 / 4 * (1 - x^2), 0)
      L <- max(0, min(as.integer(bandwidth[indexBandwidth] * n + 1e-14), n - 1))
      K <- kernel(-L:L / (bandwidth[indexBandwidth] * n))
      Kfold <- K
      Kfold[-L:L %% V == 0] <- 0
      
      precomputed <- .prepareImSXcv(K = K, Vm1 = V - 1, modValues = -L:L %% V)
      precomputed$val <- precomputed$val[, 1:max(precomputed$lengths), drop = FALSE]
      maxRight <- max(precomputed$lengthsRight)
      maxLeft <- max(precomputed$lengths - 1 - precomputed$lengthsRight)
      
      vec <- numeric(length(lambda))
      estRet <- matrix(0, n, length(lambda))
      
      for (i in 1:V) {
        J <- seq(i, n, V)
        mJ <- seq(along = y)[-J]
        rows <- rep_len(c((V - 1):1, (V - 1):1)[1:(V - 1) + (V - i)], length(mJ)) - 1
        
        ImSX <- Matrix::bandSparse(length(mJ), length(mJ), k = (-maxLeft + 1):maxRight,
                                   diagonals = .createImSXcv(n = length(mJ), precomputed = precomputed,
                                                             maxLeft = maxLeft, maxRight = maxRight, rows = rows))
        
        YmJ <- y
        YmJ[J] <- NA
        ImSy <- y[mJ] - .kernelSmoothingVfoldMJ(Y = YmJ, K = K, lengthMJ = length(mJ))
        
        ret <- glmnet::glmnet(x = ImSX[, -1], y = ImSy, lambda = lambda, standardize = FALSE, intercept = FALSE)
        
        
        for (indexLambda in seq(along = lambda)) {
          fit <- numeric(n)
          fit[mJ] <- c(0, as.numeric(ret$beta[, indexLambda]))
          fit <- cumsum(fit)
          YmJ <- y - fit
          YmJ[J] <- NA
          smooth <- .kernelSmoothingVfoldMJ(Y = YmJ, K = K, lengthMJ = length(mJ))
          
          # restimate cps by PELT
          pelt <- .pelt(obs = y[mJ] - smooth, sd = sd)
          
          # reestimate without penalty but condition on cps of PELT's estimate
          cps <- pelt$cps
          
          if (length(cps) > 0) {
            fitJ <- as.numeric(Matrix::solve(Matrix::t(ImSX[, cps, drop = FALSE]) %*% ImSX[, cps, drop = FALSE],
                                             as.numeric(Matrix::t(ImSX[, cps, drop = FALSE]) %*% ImSy)))
            fit <- numeric(n)
            fit[mJ][cps] <- fitJ
            fit <- cumsum(fit)
            
            estRet[J, indexLambda] <- fit[J] + .kernelSmoothingVfold(Y = y - fit, K = Kfold, V = V, fold = i)
          } else {
            estRet[J, indexLambda] <- .kernelSmoothingVfold(Y = y, K = Kfold, V = V, fold = i)
          }
        }
      }
      
      for (indexLambda in seq(along = lambda)) {
        vec[indexLambda] <- loss(test = y, est = estRet[, indexLambda])
      }
      
      index <- which.min(vec)
      cv[indexBandwidth] <- vec[index]
      value[[indexBandwidth]] <- lambda[1:index]
    }
  }
  
  index <- which.min(cv)
  list(lambda = value[[index]], bandwidth = bandwidth[index], cv = cv[index], cvs = cv, bandwidth = bandwidth)
}


test_that("cv.pcplus is giving correct results I", {
  testy <- rnorm(100)
  
  testbandwidth <- c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30)), Inf)
  testlambda <- c(10:1 * 10, 10:1) / 100
  testthresh <- 1e-7
  testmaxit <- 1e5
  testsd <- 1
  
  object <- cv.pcplus(y = testy, lambda = testlambda, bandwidth = testbandwidth, 
                      sd = testsd, thresh = testthresh, maxit = testmaxit)
  expected <- compareCv(testy, testlambda, testbandwidth, testsd, testthresh, testmaxit)
  expect_equal(object$cvs, expected$cvs, tolerance = 1e-1)
})

test_that("cv.pcplus is giving correct results II", {
  testy <- rnorm(100)
  testy[40:100] <- testy[40:100] + 3
  testy[70:100] <- testy[70:100] + 5
  testbandwidth <- c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30)), Inf)
  testlambda <- c(10:1 * 10, 10:1) / 100
  testthresh <- 1e-7
  testmaxit <- 1e5
  testsd <- 1
  
  object <- cv.pcplus(y = testy, lambda = testlambda, bandwidth = testbandwidth, 
                      sd = testsd, thresh = testthresh, maxit = testmaxit)
  expected <- compareCv(testy, testlambda, testbandwidth, testsd, testthresh, testmaxit)
  expect_equal(object$cvs, expected$cvs, tolerance = 1e-1)
})

test_that("cv.pcplus is giving correct results III", {
  testy <- rnorm(100)
  testy[7:100] <- testy[7:100] + 50
  testy[12:100] <- testy[12:100] + 20
  testy[90:100] <- testy[90:100] + 20
  testbandwidth <- c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30)), Inf)
  testlambda <- c(10:1 * 10, 10:1) / 100
  testthresh <- 1e-7
  testmaxit <- 1e5
  testsd <- 0.5
  
  object <- cv.pcplus(y = testy, lambda = testlambda, bandwidth = testbandwidth, 
                      sd = testsd, thresh = testthresh, maxit = testmaxit)
  expected <- compareCv(testy, testlambda, testbandwidth, testsd, testthresh, testmaxit)
  expect_equal(object$cvs, expected$cvs, tolerance = 1e-1)
})

test_that("argument y is checked", {
  testy <- rnorm(100)
  
  expect_error(cv.pcplus())
  expect_error(cv.pcplus(y = NA))
  expect_error(cv.pcplus(y = as.numeric(NA)))
  expect_error(cv.pcplus(y = c(testy, "s")))
  expect_error(cv.pcplus(y = c(testy, Inf)))
  expect_error(cv.pcplus(y = c(-Inf, testy)))
  expect_error(cv.pcplus(y = c(NaN, testy)))
})

test_that("argument bandwidth is checked", {
  testy <- rnorm(100)
  testy[7:100] <- testy[7:100] + 50
  testy[12:100] <- testy[12:100] + 20
  testy[90:100] <- testy[90:100] + 20
  
  expect_identical(cv.pcplus(y = testy),
                   cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf)))
  
  expect_error(cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf, "s")))
  expect_error(cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf, NA)))
  expect_error(cv.pcplus(y = testy, bandwidth = c(-0.05, exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf)))
  expect_error(cv.pcplus(y = testy, bandwidth = c(as.numeric(NA), exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf)))
  expect_error(cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), NaN, Inf)))
  
  expect_warning(object <- cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), 0.5)))
  expect_equal(object, cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf)))
  
  expect_warning(object <- cv.pcplus(y = testy, bandwidth = c(exp(seq(log(1.99 / 50 / 2), log(0.25), length.out = 30L)), Inf)))
  expect_equal(object, cv.pcplus(y = testy, bandwidth = c(2.01 / 50 / 2, exp(seq(log(1.99 / 50 / 2), log(0.25), length.out = 30L))[-1], Inf)))
})


test_that("argument lambda is checked", {
  testy <- rnorm(100)
  testy[7:100] <- testy[7:100] + 50
  testy[12:100] <- testy[12:100] + 20
  testy[90:100] <- testy[90:100] + 20
  testbandwidth <- c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30)), Inf)
  testlambda <- c(10:1 * 10, 10:1) / 100
  testthresh <- 1e-7
  testmaxit <- 1e5
  testsd <- 0.5

  expect_error(cv.pcplus(y = testy, lambda = "s"))
  expect_error(cv.pcplus(y = testy, lambda = NA))
  expect_error(cv.pcplus(y = testy, lambda = -0.5))
  expect_error(cv.pcplus(y = testy, lambda = as.numeric(NA)))
  expect_error(cv.pcplus(y = testy, lambda = c(seq(1, 0.05, -0.05), "s")))
  expect_error(cv.pcplus(y = testy, lambda = c(as.numeric(NA), seq(1, 0.05, -0.05))))
  expect_error(cv.pcplus(y = testy, lambda = c(seq(1, 0.5, -0.05), -0.05, seq(0.45, 0.05, -0.05))))
  expect_error(cv.pcplus(y = testy, lambda = c(seq(1, 0.5, -0.05), Inf, seq(0.45, 0.05, -0.05))))
  
  object <- cv.pcplus(y = testy, lambda = 0.05, bandwidth = testbandwidth, 
                      sd = testsd, thresh = testthresh, maxit = testmaxit)
  expected <- compareCv(testy, 0.05, testbandwidth, testsd, testthresh, testmaxit)
  expect_equal(object$cvs, expected$cvs, tolerance = 1e-1)
  
  expect_warning(object <- cv.pcplus(y = testy, lambda = rev(seq(1, 0.05, -0.05))))
  expect_identical(object, cv.pcplus(y = testy, lambda = seq(1, 0.05, -0.05)))
  expect_warning(object <- cv.pcplus(y = testy, lambda = c(seq(1, 0.2, -0.05), 0.05, 0.15, 0.1)))
  expect_identical(object, cv.pcplus(y = testy, lambda = seq(1, 0.05, -0.05)))
})

test_that("argument nbandwidth is checked", {
  testy <- rnorm(100)
  testy[7:100] <- testy[7:100] + 50
  testy[12:100] <- testy[12:100] + 20
  testy[90:100] <- testy[90:100] + 20
  
  expect_error(cv.pcplus(y = testy, nbandwidth = "s"))
  expect_error(cv.pcplus(y = testy, nbandwidth = NA))
  expect_error(cv.pcplus(y = testy, nbandwidth = NaN))
  expect_error(cv.pcplus(y = testy, nbandwidth = Inf))
  expect_error(cv.pcplus(y = testy, nbandwidth = -10))
  expect_error(cv.pcplus(y = testy, nbandwidth = as.numeric(NA)))
  expect_error(cv.pcplus(y = testy, nbandwidth = c(5, 10)))
  
  expect_identical(cv.pcplus(y = testy), cv.pcplus(y = testy, nbandwidth = 30L))
  expect_identical(cv.pcplus(y = testy, nbandwidth = 10), cv.pcplus(y = testy, nbandwidth = 10L))
  
  expect_identical(cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf), nbandwidth = 10L),
                   cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf)))
  expect_identical(cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf), nbandwidth = "s"),
                   cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf)))
  
  expect_identical(cv.pcplus(y = testy),
                   cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf)))
  expect_identical(cv.pcplus(y = testy, nbandwidth = 1L),
                   cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 1L)), Inf)))
  expect_identical(cv.pcplus(y = testy, nbandwidth = 10L),
                   cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 10L)), Inf)))
  expect_identical(cv.pcplus(y = testy, nbandwidth = 20L),
                   cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 20L)), Inf)))
})

test_that("argument nlambda is checked", {
  testy <- rnorm(100)
  testy[7:100] <- testy[7:100] + 50
  testy[12:100] <- testy[12:100] + 20
  testy[90:100] <- testy[90:100] + 20
  
  expect_error(cv.pcplus(y = testy, nlambda = "s"))
  expect_error(cv.pcplus(y = testy, nlambda = NA))
  expect_error(cv.pcplus(y = testy, nlambda = NaN))
  expect_error(cv.pcplus(y = testy, nlambda = Inf))
  expect_error(cv.pcplus(y = testy, nlambda = -10))
  expect_error(cv.pcplus(y = testy, nlambda = as.numeric(NA)))
  expect_error(cv.pcplus(y = testy, nlambda = c(5, 10)))
  
  expect_identical(cv.pcplus(y = testy), cv.pcplus(y = testy, nlambda = 30L))
  expect_identical(cv.pcplus(y = testy, nlambda = 10), cv.pcplus(y = testy, nlambda = 10L))
  
  expect_identical(cv.pcplus(y = testy, lambda = c(10:1 * 10, 10:1) / 100, nlambda = 10L),
                   cv.pcplus(y = testy, lambda = c(10:1 * 10, 10:1) / 100))
  expect_identical(cv.pcplus(y = testy, lambda = c(10:1 * 10, 10:1) / 100, nlambda = "s"),
                   cv.pcplus(y = testy, lambda = c(10:1 * 10, 10:1) / 100))
})

test_that("argument lambda.min.ratio is checked", {
  testy <- rnorm(100)
  testy[7:100] <- testy[7:100] + 50
  testy[12:100] <- testy[12:100] + 20
  testy[90:100] <- testy[90:100] + 20
  
  expect_error(cv.pcplus(y = testy, lambda.min.ratio = "s"))
  expect_error(cv.pcplus(y = testy, lambda.min.ratio = NA))
  expect_error(cv.pcplus(y = testy, lambda.min.ratio = NaN))
  expect_error(cv.pcplus(y = testy, lambda.min.ratio = Inf))
  expect_error(cv.pcplus(y = testy, lambda.min.ratio = -1))
  expect_error(cv.pcplus(y = testy, lambda.min.ratio = 0))
  expect_error(cv.pcplus(y = testy, lambda.min.ratio = 1))
  expect_error(cv.pcplus(y = testy, lambda.min.ratio = 2))
  expect_error(cv.pcplus(y = testy, lambda.min.ratio = as.numeric(NA)))
  expect_error(cv.pcplus(y = testy, lambda.min.ratio = c(0.5, 0.1)))
  
  expect_identical(cv.pcplus(y = testy), cv.pcplus(y = testy, lambda.min.ratio = 0.01))

  expect_identical(cv.pcplus(y = testy, lambda = c(10:1 * 10, 10:1) / 100, lambda.min.ratio = 0.1),
                   cv.pcplus(y = testy, lambda = c(10:1 * 10, 10:1) / 100))
  expect_identical(cv.pcplus(y = testy, lambda = c(10:1 * 10, 10:1) / 100, lambda.min.ratio = "s"),
                   cv.pcplus(y = testy, lambda = c(10:1 * 10, 10:1) / 100))
})

test_that("argument sd is checked", {
  testy <- rnorm(100)
  testy[7:100] <- testy[7:100] + 50
  testy[12:100] <- testy[12:100] + 20
  testy[90:100] <- testy[90:100] + 20
  testlambda <- seq(1, 0.05, -0.05)
  testbandwidth <- 0.05
  testthresh <- 1e-7
  testmaxit <- 1e5
  
  expect_error(cv.pcplus(y = testy, sd = "s"))
  expect_error(cv.pcplus(y = testy, sd = NA))
  expect_error(cv.pcplus(y = testy, sd = -0.05))
  expect_error(cv.pcplus(y = testy, sd = NaN))
  expect_error(cv.pcplus(y = testy, sd = as.numeric(NA)))
  expect_error(cv.pcplus(y = testy, sd = c(0.05, 0.1)))

  testsd <- as.numeric(diff(quantile(diff(testy), c(0.25, 0.75))) / diff(qnorm(c(0.25, 0.75))) / sqrt(2)) 
  expect_identical(cv.pcplus(y = testy),
                   cv.pcplus(y = testy, sd = testsd))
  
  object <- cv.pcplus(y = testy, lambda = testlambda, bandwidth = testbandwidth, 
                      sd = 0.5, thresh = testthresh, maxit = testmaxit)
  expected <- compareCv(testy, testlambda, testbandwidth, 0.5, testthresh, testmaxit)
  expect_equal(object$cvs, expected$cvs, tolerance = 1e-1)
})

test_that("argument thresh is checked", {
  testy <- rnorm(100)
  testy[7:100] <- testy[7:100] + 50
  testy[12:100] <- testy[12:100] + 20
  testy[90:100] <- testy[90:100] + 20
  testbandwidth <- 0.1
  testlambda <- seq(1, 0.05, -0.05)
  testmaxit <- 1e5
  testsd <- 0.5
  
  expect_error(cv.pcplus(y = testy, thresh = "s"))
  expect_error(cv.pcplus(y = testy, thresh = NA))
  expect_error(cv.pcplus(y = testy, thresh = NaN))
  expect_error(cv.pcplus(y = testy, thresh = Inf))
  expect_error(cv.pcplus(y = testy, thresh = -10))
  expect_error(cv.pcplus(y = testy, thresh = 0))
  expect_error(cv.pcplus(y = testy, thresh = as.numeric(NA)))
  expect_error(cv.pcplus(y = testy, thresh = c(0.001, 1e-7)))
  
  expect_identical(cv.pcplus(y = testy),
                   cv.pcplus(y = testy, thresh = 1e-7))
  
  object <- cv.pcplus(y = testy, lambda = testlambda, bandwidth = testbandwidth, 
                      sd = testsd, thresh = 1e-5, maxit = testmaxit)
  expected <- compareCv(testy, testlambda, testbandwidth, testsd, 1e-5, testmaxit)
  expect_equal(object$cvs, expected$cvs, tolerance = 1e-1)
  
  object <- cv.pcplus(y = testy, lambda = testlambda, bandwidth = testbandwidth, 
                      sd = testsd, thresh = 1e-3, maxit = testmaxit)
  expected <- compareCv(testy, testlambda, testbandwidth, testsd, 1e-3, testmaxit)
  expect_equal(object$cvs, expected$cvs, tolerance = 1e-1)
})

test_that("argument maxit is checked", {
  testy <- rnorm(100)
  testy[7:100] <- testy[7:100] + 50
  testy[12:100] <- testy[12:100] + 20
  testy[90:100] <- testy[90:100] + 20
  testbandwidth <- 0.1
  testlambda <- c(0.5, 0.4)
  testthresh <- 1e7
  testsd <- 0.5
  
  expect_error(cv.pcplus(y = testy, maxit = "s"))
  expect_error(cv.pcplus(y = testy, maxit = NA))
  expect_error(cv.pcplus(y = testy, maxit = NaN))
  expect_error(cv.pcplus(y = testy, maxit = Inf))
  expect_error(cv.pcplus(y = testy, maxit = -10))
  expect_error(cv.pcplus(y = testy, maxit = 0))
  expect_error(cv.pcplus(y = testy, maxit = as.numeric(NA)))
  expect_error(cv.pcplus(y = testy, maxit = c(100, 1e4)))

  expect_identical(cv.pcplus(y = testy),
                   cv.pcplus(y = testy, maxit = 1e5L))
  expect_identical(cv.pcplus(y = testy, maxit = 1e4),
                   cv.pcplus(y = testy, maxit = 1e4L))
  
  object <- cv.pcplus(y = testy, lambda = testlambda, bandwidth = testbandwidth, 
                      sd = testsd, thresh = testthresh, maxit = 1e3)
  expected <- compareCv(testy, testlambda, testbandwidth, testsd, testthresh, 1e3)
  expect_equal(object$cvs, expected$cvs, tolerance = 1e-1)
  
  object <- cv.pcplus(y = testy, lambda = testlambda, bandwidth = testbandwidth, 
                      sd = testsd, thresh = testthresh, maxit = 5)
  expected <- compareCv(testy, testlambda, testbandwidth, testsd, testthresh, 5)
  expect_equal(object$cvs, expected$cvs, tolerance = 1e-1)
})

Try the PCpluS package in your browser

Any scripts or data that you put into this service are public.

PCpluS documentation built on April 4, 2025, 2:14 a.m.