tests/testthat/test-gibbs_alg_a.R

test_that("gibbs_alg() estimates the change-points for multiple data sequences and cluster observations accordingly to their change-point structure starting from the true", {

  library(BayesCPclust)

  # Considering the data available at data_a with 5 data sequences with M = 50 evaluation points and two change points, distributed at random between d = 2 clusters
  d = 2
  N = 5
  M = 50
  w = 10
  maxIter = 100

  set.seed(1238)
  sigma2 <- round(extraDistr::rinvgamma(5, 21, 0.05*20),4)

  param1 <- list(list(2, c(1, 19, 34, 51), c(5, 20, 10)),
                 list(2, c(1, 15, 32, 51), c(17, 10, 2)),
                 sigma2)

  data(data_a)

  # initial values for the Gibbs sampler (true values for K and T)
  K_true <- c(param1[[1]][[1]], param1[[2]][[1]])

  Tl_true <- list(param1[[1]][[2]], param1[[2]][[2]])

  # maximum number of change-points
  kstar <- ((M - 1)/w) - 1

  #alpha0 = 1/100, lambda = 2,as = 2, bs = 1000, al = 2, bl = 1000, a = 2, b = 1000,
  res <- gibbs_alg(alpha0 = 1/100, N = 5, w = 10, M = 50,
                   K = K_true,
                   Tl = Tl_true,
                   cluster = data_a[[2]],
                   alpha = list(param1[[1]][[3]], param1[[2]][[3]]),
                   sigma2 = param1[[3]],
                   bs = 1000, as = 2, al = 2, bl = 1000, a = 2, b = 1000, kstar = kstar,
                   lambda = 2, Y = data_a[[1]], d = 2, maxIter = maxIter)

  # Compute mode of the posterior samples

  # thinning parameter
  startpoint_sampling = 2

  index <- seq(from = round((maxIter/2)+1), to = maxIter, by = startpoint_sampling)

  res1 <- list(Ck = res$Ck[index], Nclusters = res$Nclusters[index], lambda = res$lambda[index],
               alpha0 = res$alpha0[index], clusters = res$clusters[index,], sigma2 = res$sigma2[index,],
               Tl = res$Tl[index], alphal = res$alphal[index])

  # Posterior means and modes for all parameters except alpha and T

  out2 <- list(Nclusters = Mode(res1$Nclusters),
               lambda = mean(res1$lambda), alpha0 = mean(res1$alpha0),
               clusters = apply(res1$clusters, 2, Mode), sigma2 = apply(res1$sigma2, 2, mean))

  # Posterior means for alpha and T

  Ckres <- do.call("rbind", lapply(res1$Ck, "[", seq(1:max(sapply(res1$Ck, length)))))

  out2$Ck <- apply(Ckres[,sort(unique(out2$clusters))], 2, Mode)

  # merging selected iteration for Tl and alpha (considering we can have different sizes in each iteration)

  Tlist_tab <- lapply(1:length(res1$Tl),
                      function(x){do.call("list",
                                          lapply(res1$Tl[[x]][sort(unique(res1$clusters[x,]))], "[",
                                                 seq(1:max(sapply(res1$Tl[[x]][sort(unique(res1$clusters[x,]))],
                                                                  length)))))})



  # creating list for each cluster, considering the most frequent number of clusters
  Tlist <- lapply(1:out2$Nclusters, function(x){lapply(Tlist_tab , "[[", x)})


  Tvalues <- lapply(1:length(Tlist),
                    function(x){do.call("rbind",
                                        lapply(Tlist[[x]], "[",
                                               seq(1:max(sapply(Tlist[[x]],
                                                                length)))))})

  a_tab <- lapply(1:length(res1$alphal),
                  function(x){do.call("list",
                                      lapply(res1$alphal[[x]][sort(unique(res1$clusters[x,]))], "[",
                                             seq(1:max(sapply(res1$alphal[[x]][sort(unique(res1$clusters[x,]))],
                                                              length)))))})


  alist <- lapply(1:out2$Nclusters, function(x){lapply(a_tab, "[[", x)})


  avalues <- lapply(1:length(alist),
                    function(x){do.call("rbind",
                                        lapply(alist[[x]], "[", seq(1:max(sapply(alist[[x]],
                                                                                 length)))))})


  res_alpha <- list()
  res_Tl <- list()
  for(i in 1:out2$Nclusters){
    ck <- out2$Ck[i]
    if(ck == 0){
      alphapos <- mean(avalues[[i]])
      Tlpos <- Mode(Tvalues[[i]])
    } else{
      Tl <- Tvalues[[i]][,-c(1,ncol(Tvalues[[i]]))]
      alphapos <- apply(avalues[[i]][,1:(ck+1)], 2, mean)
      Tlpos <- apply(Tl[,1:ck], 2, Mode)
    }
    res_alpha[[i]] <- alphapos
    res_Tl[[i]] <- Tlpos
  }

  out2$alpha <- res_alpha
  out2$Tl  <- res_Tl

  # checking if it the number of cluster estimated is equal to the number generated
  expect_equal(out2$Nclusters, d)

  # checking if it the number of change points per cluster estimated is equal to the number generated
  expect_equal(out2$Ck, K_true)

  # checking if it the position of the change points per cluster estimated is equal to the true values
  expect_equal(out2$Tl, list(Tl_true[[1]][c(2,3)], Tl_true[[2]][c(2,3)]))

  # checking if it the position of the mean level between change change points per cluster are approximately equal to the true values
  expect_equal(lapply(out2$alpha, function(x){round(x)}), list(param1[[1]][[3]], param1[[2]][[3]]))
})

Try the BayesCPclust package in your browser

Any scripts or data that you put into this service are public.

BayesCPclust documentation built on April 4, 2025, 5:19 a.m.