R/bcluster.R
In cata: Analysis of Check-All-that-Apply (CATA) Data

Documented in bcluster bcluster.h bcluster.n inspect

#' Wrapper function for b-cluster analysis
#'
#' By default, \code{bcluster} calls a function to perform b-cluster analysis 
#' by a non-hierarchical iterative ascent algorithm, then inspects results if 
#' there are multiple runs. 
#' 
#' @name bcluster
#' @param X three-way array with \eqn{I} assessors, \eqn{J} products, 
#' \eqn{M} attributes where CATA data have values \code{0} (not checked) and 
#' \code{1} (checked)
#' @param inspect default (\code{TRUE}) calls the \code{\link[cata]{inspect}}
#' function to evaluate all solutions (when \code{runs>1})
#' @param inspect.plot default (\code{TRUE}) plots results from the 
#' \code{\link[cata]{inspect}} function 
#' @param algorithm default is \code{n} for non-hierarchical; \code{h} for 
#' hierarchical 
#' @param measure default is \code{b} for the \code{b}-measure; \code{Q} for 
#' Cochran's Q test 
#' @param G number of clusters (required for non-hierarchical algorithm)
#' @param M initial cluster memberships
#' @param max.iter maximum number of iteration allowed (default \code{500})
#' @param X.input available only for non-hierarchical algorithm; its value is
#' either \code{"data"} (default) or \code{"bc"} if \code{X} is
#' obtained from the function \code{\link[cata]{barray}} 
#' @param tol non-hierarchical algorithm stops if variance over 5 iterations is
#' less than \code{tol} (default: \code{exp(-32)})
#' @param runs number of runs (defaults to \code{1})
#' @param seed for reproducibility (default is \code{2021})
#' @return list with elements:
#' \itemize{
#' \item{\code{runs} : b-cluster analysis results from \code{\link[cata]{bcluster.n}} 
#' or \code{\link[cata]{bcluster.h}} (in a list if \code{runs>1})}
#' \item{\code{inspect} : result from \code{\link[cata]{inspect}} (the plot from 
#' this function is rendered if \code{inspect.plot} is \code{TRUE})}}
#' @export
#' @encoding UTF-8
#' @author J.C. Castura
#' @references Castura, J.C., Meyners, M., Varela, P., & Næs, T. (2022). 
#' Clustering consumers based on product discrimination in check-all-that-apply 
#' (CATA) data. \emph{Food Quality and Preference}, 104564. 
#' \doi{10.1016/j.foodqual.2022.104564}.
#' @examples
#' # b-cluster analysis on the first 8 consumers and the first 5 attributes
#' (b1 <- bcluster(bread$cata[1:8,,1:5], G=2, seed = 123))
#' # Since the seed is the same, the result will be identical to
#' # (b2 <- bcluster.n(bread$cata[1:8,,1:5], G=2, seed = 123))
#' b3 <- bcluster(bread$cata[1:8,,1:5], G=2, runs = 5, seed = 123)
bcluster <- function(X, inspect = TRUE, inspect.plot = TRUE,
                     algorithm = "n", measure = "b",
                     G = NULL, M = NULL, max.iter = 500, X.input = "data", 
                     tol = exp(-32), runs = 1, seed = 2021){
  if(is.null(X)) return(print("X must be an array"))
  if(algorithm %in% c("h", "1")){
    if(X.input %in% "bc"){
      return(invisible("X.input 'bc' not available for hierarchical algorithm"))
    }
    out <- bcluster.h(X = X, measure = measure, runs = runs, 
                      seed = seed)
  }
  if(algorithm %in% c("n", "2")){
    if(is.null(G)){
      return(print("G must be specified"))
    } 
    if(length(G) > 1){
      return(print("G cannot be longer than one"))
    }
    out <- bcluster.n(X = X, G = G, M = M, measure = measure, 
                      max.iter = max.iter, X.input = X.input,
                      tol = tol, runs = runs,
                      seed = seed)
  }
  out <- list(runs = out)
  if((runs > 1) & inspect){
    if(algorithm %in% "h"){
      if(is.null(G)){
        print("Number of groups (G) not provided. Defaulting to G=2 groups.")
        G <- 2
      }
    }
    print(out$inspect <- inspect(out$runs, G = G, inspect.plot = inspect.plot))
  }
  invisible(out)
}

#' b-cluster analysis by hierarchical agglomerative strategy
#'
#' Perform b-clustering using the hierarchical agglomerative clustering 
#' strategy. 
#' @name bcluster.h
#' @param X three-way array; the \eqn{I \times J \times M} array has \eqn{I}
#' assessors, \eqn{J} products, \eqn{M} attributes where CATA data have values 
#' \code{0} (not checked) and \code{1} (checked)
#' @param measure currently only \code{b} (the \code{b}-measure) is implemented 
#' @param runs number of runs (defaults to \code{1}; use a higher number of
#' runs for a real application)
#' @param seed for reproducibility (default is \code{2021})
#' @return An object of class \code{hclust} from hierarchical b-cluster 
#' analysis results (a list of such objects if \code{runs>1}), where each \code{hclust} 
#' object has the structure described in \code{\link[stats]{hclust}} as well as 
#' the item \code{retainedB} (a vector indicating the retained sensory 
#' differentiation at each iteration (merger)).
#' @export
#' @encoding UTF-8
#' @author J.C. Castura
#' @references Castura, J.C., Meyners, M., Varela, P., & Næs, T. (2022). 
#' Clustering consumers based on product discrimination in check-all-that-apply 
#' (CATA) data. \emph{Food Quality and Preference}, 104564. 
#' \doi{10.1016/j.foodqual.2022.104564}.
#' @examples
#' # hierarchical b-cluster analysis on first 8 consumers and first 5 attributes
#' b <- bcluster.h(bread$cata[1:8,,1:5])
#' 
#' plot(as.dendrogram(b), 
#'   main = "Hierarchical b-cluster analysis", 
#'   sub = "8 bread consumers on 5 attributes")
bcluster.h <- function(X, measure = "b", runs = 1, seed = 2021){
  ## DEBUG
  # X = bread$cata[1:14,,1]
  # measure = "b"
  # runs = 1
  # seed = 2021
  verbose = FALSE
  bcluster.call <- match.call()
  if(measure != "b"){
    return(print("Only the b-measure is implemented"))
  } 
  ### functions
  # get.gmem gets group members
  #  g.indx is the index for the group
  #  curr.df current allocations in the first 2 columns
  get.gmem <- function(g.indx, curr.df){
    return(curr.df$start.g[curr.df$curr.g == g.indx])
  }
  # init.candidates
  #   cnd1.indx : all members IDs of group 1
  #   cnd2.indx : all members IDs of group 2
  #   X.bc      : bc array
  #   b         : vector of b-measures
  #   sensDiff  : mN columns with 1 if differentiated and 0 if not
  init.candidates <- function(cnd1.indx, cnd2.indx, X.bc, b, sensDiff, 
                              X.bc.dim = NULL){
    # DEBUG
    # cnd1.indx = df.cnd$cnd1
    # cnd2.indx = df.cnd$cnd2
    # X.bc = X.bc
    # b = df.curr$b
    # sensDiff = df.curr[,-c(1:3)]
    # X.bc.dim = X.bc.dim
    if(!is.null(X.bc.dim[1])){
      if(!all.equal(dim(X.bc), X.bc.dim)){
        X.bc <- array(X.bc, X.bc.dim)
      }  
    }
    new.b <- getb(X.bc[c(cnd1.indx, cnd2.indx),,1,, drop=FALSE], 
                  X.bc[c(cnd1.indx, cnd2.indx),,2,, drop=FALSE],
                  oneI = ifelse(length(c(cnd1.indx, cnd2.indx))==1, TRUE, FALSE),
                  oneM = ifelse(X.bc.dim[length(X.bc.dim)]==1, TRUE, FALSE))
    if(X.bc.dim[length(X.bc.dim)]==1){
      sensDiff <- matrix(sensDiff, ncol=1)
      sensDiff.o <- sum(colSums(matrix(sensDiff[c(cnd1.indx, cnd2.indx), ], 
                                       ncol=1))>1)
    } else {
      sensDiff.o <- sum(colSums(sensDiff[c(cnd1.indx, cnd2.indx), ])>1)
    }
    return(c(new.b, new.b - b[cnd1.indx] - b[cnd2.indx],
             sensDiff.o))
  }
  # check if id being merged is a singleton (or not)
  isSingleton <- function(id, g.curr){
    return(sum(g.curr$curr.g %in% id)==1)
  }
  # update C and # differentiating attributes
  # update.candidates
  #   cnd1.indx : keys corresponding to all members of group 1
  #   cnd2.indx : keys corresponding to all members of group 2
  #   X.bc      : bc array
  #   b         : vector of b-measures
  #   sensDiff  : mN columns with 1 if differentiated and 0 if not
  update.candidates <- function(cnd1.indx, cnd2.indx, X.bc, 
                                group.keys, b, sensDiff, X.bc.dim = NULL){
    if(!is.null(X.bc.dim[1])){
      if(!all.equal(dim(X.bc), X.bc.dim)){
        X.bc <- array(X.bc, X.bc.dim)
      }  
    }
    g1.keys <- group.keys$start.g[group.keys$curr.g == cnd1.indx]
    g2.keys <- group.keys$start.g[group.keys$curr.g == cnd2.indx]
    new.b <- getb(X.bc[c(g1.keys, g2.keys),,1,, drop=FALSE], 
                  X.bc[c(g1.keys, g2.keys),,2,, drop=FALSE],
                  oneI = ifelse(length(c(g1.keys, g2.keys))==1, TRUE, FALSE),
                  oneM = ifelse(X.bc.dim[length(X.bc.dim)]==1, TRUE, FALSE))
    if(X.bc.dim[length(X.bc.dim)]==1){
      sensDiff <- matrix(sensDiff, ncol=1)
      sensDiff.o <- sum(colSums(matrix(sensDiff[c(group.keys$curr.g 
                                                  %in% c(cnd1.indx, cnd2.indx)), ],
                                       ncol=1))>1)
    } else {
      sensDiff.o <- sum(colSums(sensDiff[c(group.keys$curr.g 
                                           %in% c(cnd1.indx, cnd2.indx)), ])>1)
    }
    return(c(new.b, 
             new.b - b[group.keys$start.g == cnd1.indx] - 
               b[group.keys$start.g == cnd2.indx],
             sensDiff.o))
  }
  # create hclust merge matrix from M, which is my progress.track object
  write.merge <- function(M){
    out <- M
    last.use <- rep(NA, nrow(M))
    for(i in 1:nrow(M)){
      if(!is.na(last.use[abs(M[i,1])])){
        out[i,1] <- last.use[M[i,1]]
      }
      if(!is.na(last.use[abs(M[i,2])])){
        out[i,2] <- last.use[M[i,2]]
      }
      last.use[abs(M[i,1])] <- i
      last.use[abs(M[i,2])] <- i
    }
    out <- as.matrix(out)
    colnames(out) <- NULL
    return(out)
  }
  ## end of functions
  
  nI <- dim(X)[1]
  nJ <- dim(X)[2]
  if (length(dim(X)) == 2) {
    X <- array(X, c(dim(X)[1], dim(X)[2], 1), dimnames = list(dimnames(X)[[1]], 
                                                              dimnames(X)[[2]], "data"))
  }
  nM <- dim(X)[3]
  X.bc <- barray(X)
  if (length(dim(X.bc)) == 3) {
    X.bc <- array(X.bc, c(dim(X.bc)[1], dim(X.bc)[2], 2, 
                          1), dimnames = list(dimnames(X.bc)[[1]], dimnames(X.bc)[[2]], 
                                              letters[2:3], "data"))
  }
  nJJ <- dim(X.bc)[2]
  
  if(is.null(dimnames(X.bc)[[1]])) dimnames(X.bc)[[1]] <- 1:nI
  if(is.null(dimnames(X.bc)[[2]])) dimnames(X.bc)[[2]] <- 1:nJJ
  if(is.null(dimnames(X.bc)[[3]])) dimnames(X.bc)[[2]] <- c("n10", "n01")
  if(is.null(dimnames(X.bc)[[4]])) dimnames(X.bc)[[4]] <- 1:nM
  
  X.bc.dim <- dim(X.bc)
  
  progress.track <- data.frame(iter = 1:(nI-1), 
                               cons1 = NA, cons2 = NA, 
                               isSingleton1 = NA, isSingleton2 = NA,
                               C = NA,
                               n.tiebreak.sensDiff = NA, 
                               n.tiebreak.rand = NA)
  # df.curr
  #   $start.g   consumer start group indx
  #   $curr.g    current group indx
  #   $b         sensory differentiation of this group (NA if merged)
  #   $sensDif   an nM-column matrix for current group indicating that 1+ consumer 
  #              differentiates products on this attribute (NA if merged)
  df.curr <- data.frame(start.g = 1:nI, 
                        curr.g = 1:nI,
                        b = rowSums(abs(X.bc[,,1,]-X.bc[,,2,])),
                        sensDiff = ((apply(X.bc, c(1,4), sum) %% nJJ != 0)*1))
  totB <- sum(df.curr$b)
  
  # gMat
  # $cnd1    candidate group 1 for possible merger       
  # $cnd2    candidate group 2 for possible merger       
  # $C       C, sensory differentiation lost by merging candidate groups       
  # $sensDif number of attributes for which 1+ consumer differentiates products      
  df.cnd <- as.data.frame(cbind(t(utils::combn(nI, 2)), 
                                matrix(NA, ncol =3, nrow=nI*(nI-1)/2)))
  colnames(df.cnd) <- c("cnd1", "cnd2", "b", "C", "sensDif")
  
  df.cnd[, 3:5] <- t(mapply(init.candidates, 
                            cnd1.indx = df.cnd$cnd1, cnd2.indx = df.cnd$cnd2, 
                            MoreArgs = list(X.bc = X.bc, b = df.curr$b, 
                                            sensDiff = df.curr[,-c(1:3)],
                                            X.bc.dim = X.bc.dim)))
  if(runs>1){
    # cache calculated start values for future runs
    cache <- list(progress.track = progress.track,
                  df.cnd = df.cnd,
                  df.curr = df.curr)
  }
  hclust.list <- list()
  if(verbose){
    track.list <- list()
  }
  for(this.run in 1:runs){
    set.seed(seed + this.run)
    if(this.run > 1){
      # restore from cache
      progress.track <- cache$progress.track
      df.cnd <- cache$df.cnd
      df.curr <- cache$df.curr
    }
    # track output
    hclust.list[[this.run]] <- list()
    if(verbose){
      hclust.list[[this.run]]$track.list <-
        # track.list[[this.run]] <- list()
        # track.list[[this.run]][[1]] <- 
        list(df.cnd = df.cnd, df.curr = df.curr)
      hclust.list[[this.run]]$track.list.by.iter <- list()
    }
    for(iter in 1:(nI-1)){
      if(verbose){
        # Write state to track.list  
        hclust.list[[this.run]]$track.list.by.iter[[iter+1]] <- 
          # track.list[[this.run]][[iter+1]] <- 
          list(df.cnd = df.cnd, 
               df.curr = df.curr)
      }
      # Find groups that the maximize C
      best.indx <- which(df.cnd$C == max(df.cnd$C))
      if(length(best.indx)>1){
        # Tie-breaking required
        # 1. Maximize # attributes with sensory differentiation in both groups
        best.indx2 <- best.indx[which(df.cnd$sensDif[best.indx] == 
                                        max(df.cnd$sensDif[best.indx]))]
        # 2. Still ties to select at random
        g.indx <- as.numeric(sample(as.character(best.indx2), 1))
      } else {
        g.indx <- best.indx2 <- best.indx
      }
      # Update progress track
      progress.track[iter,] <- 
        c(iter, 
          df.cnd$cnd1[g.indx], df.cnd$cnd2[g.indx],
          isSingleton(df.cnd$cnd1[g.indx], df.curr[,1:2]),
          isSingleton(df.cnd$cnd2[g.indx], df.curr[,1:2]),
          df.cnd$C[g.indx], length(best.indx)-1, length(best.indx2)-1)
      # Post-merge updates
      # Update the current group of second group (progress.track$cons2[iter])
      df.curr$curr.g[which(df.curr$curr.g == progress.track$cons2[iter])] <-
        progress.track$cons1[iter]
      # Update the b-measure of new group
      df.curr$b[which(df.curr$curr.g == progress.track$cons1[iter])] <-
        df.cnd$b[g.indx]
      # Update number of attributes differentiated by new group
      df.curr[which(df.curr$start.g == 
                      progress.track$cons1[iter]), -c(1:3)] <-
        ifelse(nM > 1, (colSums(df.curr[
          which(df.curr$curr.g %in% 
                  c(progress.track$cons1[iter],
                    progress.track$cons2[iter])), -c(1:3)])>0)*1,
          (colSums(matrix(df.curr[
            which(df.curr$curr.g %in% 
                    c(progress.track$cons1[iter],
                      progress.track$cons2[iter])), -c(1:3)], ncol=nM))>0)*1)
      # Remove rows for second group from df.cnd
      df.cnd <- df.cnd[-c(which(df.cnd$cnd1 == progress.track$cons2[iter]),
                          which(df.cnd$cnd2 == progress.track$cons2[iter])),]
      # Update comparisons involving the new merged group
      update.indx <- sort(c(which(df.cnd$cnd1 == progress.track$cons1[iter]),
                            which(df.cnd$cnd2 == progress.track$cons1[iter])))
      df.cnd[update.indx, -c(1:2)] <-
        t(mapply(update.candidates, 
                 cnd1.indx = df.cnd$cnd1[update.indx],  
                 cnd2.indx = df.cnd$cnd2[update.indx],
                 MoreArgs = 
                   list(X.bc = X.bc, 
                        group.keys = df.curr[,1:2],
                        b = df.curr$b,
                        sensDiff = df.curr[, -c(1:3)],
                        X.bc.dim = X.bc.dim)))  
    }
    # All iterations complete so create the hclust object
    hc.obj <- structure(list(
      merge = write.merge(
        cbind(progress.track$cons1*c(1,-1)[progress.track$isSingleton1+1],
              progress.track$cons2*c(1,-1)[progress.track$isSingleton2+1])),
      height = -1 * progress.track$C,
      order = progress.track$iter,
      labels = as.character(dimnames(X.bc)[[1]]), 
      retainedB = totB + cumsum(progress.track$C),
      method = paste0("b-cluster analysis (", measure, "-measure)"), 
      call = bcluster.call, 
      seed = seed + this.run,
      dist.method = measure), 
      class = "hclust")
    hc.obj$order <- unname(stats::order.dendrogram(stats::as.dendrogram(hc.obj)))
    # if(!(stats::.validity.hclust(hc.obj))){
    #   print("Dendrogram failed")
    # }
    class(hc.obj) <- "hclust"
    hclust.list[[this.run]] <- hc.obj
    if(verbose){
      # Write state to track.list  
      hclust.list[[this.run]]$track.list.by.iter[[
        length(hclust.list[[this.run]]$track.list.by.iter)]]$progress.track <- 
        #   track.list[[this.run]][[length(track.list[[this.run]])]]$progress.track <- 
        progress.track
      # name the iterations
      #names(track.list[[this.run]]) <- c("init", paste0("iter", iter))
    }
  }
  names(hclust.list) <- paste0("run", 1:runs)
  # if(verbose){
  #   # names(track.list) <- paste0("run", 1:runs)
  #   out <- list(hclust.list = hclust.list,
  #               track.list = track.list)
  # } else {
  if(runs > 1){
    out <- hclust.list
  } else {
    out <- hclust.list[[1]]
  }
  # }
  invisible(out)
}

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# unexported functions needed for bcluster.n()
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# for getb function in C++
# @useDynLib cata, .registration = TRUE
# @importFrom Rcpp evalCpp

# calculate Cochran's Q test statistic
.getQ <- function(X){
  if(length(dim(X))<3){
    #out <- cochranQ(X, quiet = TRUE)
    return(cochranQ(X, quiet = TRUE))
  } else {
    #out <- sum(apply(X, 3, cochranQ, quiet = TRUE), na.rm=TRUE)
    return(sum(apply(X, 3, cochranQ, quiet = TRUE), na.rm=TRUE))
  }
  #return(out)
}

# .getCurrent get sensory differentiation retained based on current clusters
.getCurrent <- function(M, X = NULL, 
                        G = length(unique(M)), 
                        X.bc = NULL, X.bc.dim = NULL, measure = "b"){
  current <- rep(NA, G)
  if(measure == "b"){
    if(!all.equal(dim(X.bc), X.bc.dim)){
      X.bc <- array(X.bc, X.bc.dim)
    }  
  } 
  oneM <- (X.bc.dim[length(X.bc.dim)] == 1)
  b.bygroup <- function(g, M, oneM){
    g.mem <- which(M==g)
    return(getb(X.bc[g.mem,,1,, drop=FALSE], 
                X.bc[g.mem,,2,, drop=FALSE],
                oneI = (length(g.mem)==1), 
                oneM = oneM))
  }
  if(measure == "b"){
    return(mapply(b.bygroup, 1:G, MoreArgs = list(M=M, oneM=oneM)))
  }
  if(measure == "Q"){
    for(g in 1:G){
      g.mem <- which(M==g)
      current[g] <- .getQ(X[g.mem,, ,drop=FALSE])
    }
  } 
}

# .getCandidateMatrix is used in .initCandidate() to evaluate 
# b-measure of cluster g where 'assessor' is either 
# * removed (new.g is NA) or 
# * added (to cluster number 'new.g')
.getCandidateMatrix <- function(g, new.g, assessor, M=M, 
                                X.bc=X.bc, X.bc.dim=X.bc.dim){
  this.M <- M
  this.M[assessor] <- new.g
  g.mem <- which(this.M==g)
  return(
    getb(X.bc[g.mem,,1,, drop=FALSE],
         X.bc[g.mem,,2,, drop=FALSE],
         oneI = (length(g.mem)==1),
         oneM = (X.bc.dim[length(X.bc.dim)]==1))
  )
}

# .initCandidate calculates the "calculated values matrix"
.initCandidate <- function(M, X = NULL, G = length(unique(M)), 
                           X.bc = NULL, X.bc.dim = NULL, measure = "b"){
  if(measure %in% "b"){
    # columns: current group, potential group, change in b
    indxGrid <- cbind(curr.g = rep(M, times = G), # 1 
                      change.curr.g = rep(1:G, each = length(M)), # 2
                      new.g = rep(1:G, each = length(M)), # 3
                      assessor = 1:length(M), # 4
                      b = NA) # 5
    # dont' need to calculate if the group doesn't change
    indxGrid[which(indxGrid[,1]==indxGrid[,2]), 3] <- NA
    return(
      matrix(mapply(.getCandidateMatrix, 
                    g=indxGrid[,2], 
                    new.g=indxGrid[,3],
                    assessor=indxGrid[,4],
                    MoreArgs = list(M=M, X.bc=X.bc, X.bc.dim=X.bc.dim)),
             nrow=length(M), ncol = G)
      )
  } else {
    for(i in seq_along(M)){
      this.M <- M
      curr.g <- M[i]
      for(g in 1:G){
        # drop i from g if in the group, add i to g if not in the group
        this.M[i] <- ifelse(g == curr.g, NA, g)
        if(measure == "Q"){
          out[i,g] <- .getQ(X[which(this.M==g),,,drop=FALSE])
        }
      }
    }
    return(out)
  }
}

# .updateCandidate updates the "calculated values matrix" 
.updateCandidate <- function(M, X, candidate, update.i, update.g, 
                             G=length(unique(M)), 
                             X.bc = NULL, X.bc.dim = NULL, measure = "b"){
  if(!all.equal(dim(candidate), c(length(M), G))) return(print("error"))
  if(measure == "b"){
    if(G <= 2){
      return(
        .initCandidate(M, G = length(unique(M)), 
                       X.bc = X.bc, X.bc.dim = NULL, measure = "b")
      )
    } else {
      out <- candidate
      
      # update groups in update.g (all consumers)
      for(g in update.g){
        for(i in seq_along(M)){
          this.M <- M
          curr.g <- M[i]
          this.M[i] <- ifelse(g == curr.g, NA, g) 
          g.mem <- which(this.M==g)
          out[i,g] <- getb(X.bc[g.mem,,1,, drop=FALSE], 
                           X.bc[g.mem,,2,, drop=FALSE], 
                           oneI = (length(g.mem)==1),
                           oneM = (X.bc.dim[length(X.bc.dim)]==1)) 
        }
      }
      # update consumers in update.i (groups in update.g already done)
      for(i in update.i){
        for(g in 1:G){
          if(!(g %in% update.g)){
            this.M <- M
            curr.g <- M[i]
            this.M[i] <- ifelse(g == curr.g, NA, g) 
            g.mem <- which(this.M==g)
            out[i,g] <- getb(X.bc[g.mem,,1,, drop=FALSE], 
                             X.bc[g.mem,,2,, drop=FALSE], 
                             oneI = (length(g.mem)==1),
                             oneM = (X.bc.dim[length(X.bc.dim)]==1)) 
          }
        }
      }
      # for(i in seq_along(M)){
      #   this.M <- M
      #   curr.g <- M[i]
      #   for(g in 1:G){
      #     if((i %in% update.i) || (g %in% update.g)){
      #       # drop i from g if in the group, add i to g if not in the group
      #       this.M[i] <- ifelse(g == curr.g, NA, g) 
      #       g.mem <- which(this.M==g)
      #       out[i,g] <- getb(X.bc[g.mem,,1,], 
      #                          X.bc[g.mem,,2,], 
      #                          oneI = (length(g.mem)==1),
      #                          oneM = (X.bc.dim[length(X.bc.dim)]==1)) 
      #     }
      #   }
      # }
      return(out)
    }
  } else {
    out <- candidate
    for(i in seq_along(M)){
      this.M <- M
      curr.g <- M[i]
      for(g in 1:G){
        if((i %in% update.i) || (g %in% update.g)){
          # drop i from g if in the group, add i to g if not in the group
          this.M[i] <- ifelse(g == curr.g, NA, g) 
          out[i,g] <- .getQ(X[which(this.M==g),, ,drop=FALSE])
        }
      }
    }
    return(out)
  }
}

# calculate the gain matrix
.getGainMatrix <- function(g, new.g, assessor, M=M, X.bc=X.bc, 
                           X.bc.dim=X.bc.dim){
  this.M <- M
  this.M[assessor] <- new.g
  g.mem <- which(this.M==g)
  return(
    getb(X.bc[g.mem,,1,, drop=FALSE], 
         X.bc[g.mem,,2,, drop=FALSE],
         oneI = (length(g.mem)==1),
         oneM = (X.bc.dim[length(X.bc.dim)]==1))
  )
}

# helper function for .getGainMat
.thisgain <- function(i, g, current = current, candidate = candidate, M = M){
  if(M[i] == g){
    return(NA)
  } else {
    return(sum(candidate[i, c(g, M[i])]) - sum(current[c(g, M[i])]))
  } 
}

# .getGainMat calculates the update (gain) matrix U
.getGainMat <- function(current, candidate, M, G=length(unique(M))){
  tmp <- cbind(assessor = rep(seq_along(M), times = G), 
               group = rep(1:G, each = length(M)),
               gain = NA)
  return(matrix(mapply(.thisgain, 
                       i = rep(seq_along(M), times = G), 
                       g = rep(1:G, each = length(M)), 
                       MoreArgs = list(current = current,
                                       candidate = candidate,
                                       M = M)), nrow = length(M)))
}

.idSwap <- function(current, candidate, M, G=length(unique(M))){
  gainMat <- .getGainMat(current, candidate, M, G=length(unique(M)))
  best.change <- max(gainMat, na.rm = TRUE)
  if(best.change<0){
    return(list(i=0, g=0))
  } else {
    best.indx <- which(gainMat==best.change, arr.ind = TRUE)
    if(dim(best.indx)[1] == 1){
      return(list(i=best.indx[1], g=best.indx[2]))  
    } else {
      # if there are multiple, pick the best one
      best.indx.row <- sample(dim(best.indx)[1], 1)
      return(list(i=best.indx[best.indx.row, 1], 
                  g=best.indx[best.indx.row, 2]))  
    }
  }
}

# get J based on the number of paired comparisons
.findJ <- function(njj){
  # we know that j is larger than 1 and less than njj
  j.guess <- 1
  njj.sum <- 0
  continue <- TRUE
  while(continue){
    if(njj.sum == njj){
      continue <- FALSE
    } else {
      njj.sum <- njj.sum + j.guess
      j.guess <- j.guess + 1
    }
    if(njj.sum > njj){
      j.guess <- NA
      continue <- FALSE
    }
  }
  return(j.guess)
}

# get M for a single run
.getM <- function(nI, G, seed){
  set.seed(seed)
  return(c(sample(G, G, replace = FALSE), sample(G, nI-G, replace = TRUE)))
}

# get Ms for multiple runs
.startMs <- function(nI, G, Seeds, M = NULL, runs = NULL){
  if(!is.null(M)){
    return(cbind(M, mapply(.getM, rep(nI, runs-1), rep(G, runs-1), 
                           (Seeds+1)[-1])))
  } else {
    return(mapply(.getM, rep(nI, runs), rep(G, runs), Seeds+1))
  }
}

# perform b-cluster analysis with swapping
.bclustn <- function(M, X.bc, X.bc.dim, measure = "b", 
                     max.iter = 100, G = 1, tol = 1e-8){
  current <- .getCurrent(M = M, G = G, X.bc = X.bc, X.bc.dim = X.bc.dim, 
                         measure=measure)
  candidate <- .initCandidate(M, G = G, X = NULL, X.bc = X.bc, X.bc.dim = X.bc.dim, 
                              measure = measure)
  it <- 0
  continue <- TRUE
  len.Qual <- 1
  Qual <- c(sum(current), rep(NA, max.iter))
  
  # consider converting the while-loop to a for-loop (with a break)?
  while (continue){
    this.swap <- .idSwap(current, candidate, M)
    if(this.swap$i > 0){
      old.g <- M[this.swap$i]
      # Update group membership vector
      M[this.swap$i] <- this.swap$g
      # Update candidate matrix
      candidate <- .updateCandidate(M, X = NULL, candidate, 
                                    update.i = this.swap$i, 
                                    update.g = c(old.g, this.swap$g), 
                                    G=G, X.bc = X.bc, X.bc.dim = X.bc.dim,
                                    measure = measure)
      # Recalculate current
      current <- .getCurrent(M = M, X.bc = X.bc, 
                             X.bc.dim = X.bc.dim, measure=measure)
      # Update quality measure
      Qual[it+2] <- sum(current) 
      len.Qual <- len.Qual + 1
    } else {
      continue <- FALSE # no further swaps improve the solution
      break
    }
    if (it >= max.iter || (len.Qual > 6 && Qual[len.Qual] - Qual[len.Qual-5] < tol)) {
      continue <- FALSE
      break
    } else {
      it <- it + 1
    }
  }
  o <- 
    list(cluster = .idclusters(M), 
         totalB = sum(abs(X.bc[,,1,]-X.bc[,,2,])),
         retainedB = sum(current), 
         progression = Qual[1:len.Qual], iter = it,
         finish = it<=max.iter)
  class(o) <- "bclust.n"
  return(o)
}

#' b-cluster analysis by non-hierarchical iterative ascent clustering
#' strategy
#' 
#' Non-hierarchical b-cluster analysis transfers assessors iteratively to
#' reach a local maximum in sensory differentiation retained.
#' @name bcluster.n
#' @param X CATA data organized in a three-way array (assessors, products, 
#' attributes)
#' @param G number of clusters (required for non-hierarchical algorithm)
#' @param M initial cluster memberships (default: \code{NULL}), but can be a vector
#' (one run) or a matrix (consumers in rows; runs in columns)
#' @param measure \code{b} (default) for the \code{b}-measure is implemented
#' @param max.iter maximum number of iteration allowed (default \code{500})
#' @param runs number of runs (defaults to \code{1})
#' @param X.input either \code{"data"} (default) or \code{"bc"} if \code{X} is
#' obtained from the function \code{\link[cata]{barray}} 
#' @param tol algorithm stops if variance over 5 iterations is less than 
#' \code{tol} (default: \code{exp(-32)})
#' @param seed for reproducibility (default is \code{2021})
#' @return An object of class \code{bclust.n} (or a list of such objects 
#' if \code{runs>1}), where each such object has the following components: 
#' \itemize{
#' \item{\code{cluster} : vector of the final cluster memberships}
#' \item{\code{totalB} : value of the total sensory differentiation in data set}
#' \item{\code{retainedB} : value of sensory differentiation retained in b-cluster
#' analysis solution}
#' \item{\code{progression} : vector of sensory differentiation retained in each 
#' iteration}
#' \item{\code{iter} : number of iterations completed}
#' \item{\code{finished} : boolean indicates whether the algorithm converged 
#' before \code{max.iter}}}
#' @export
#' @encoding UTF-8
#' @author J.C. Castura
#' @references Castura, J.C., Meyners, M., Varela, P., & Næs, T. (2022). 
#' Clustering consumers based on product discrimination in check-all-that-apply 
#' (CATA) data. \emph{Food Quality and Preference}, 104564. 
#' \doi{10.1016/j.foodqual.2022.104564}.
#' @examples
#' # b-cluster analysis on the first 8 consumers and the first 5 attributes
#' (b <- bcluster.n(bread$cata[1:8, , 1:5], G=2))
bcluster.n <- function(X, G, M = NULL, measure = "b", max.iter = 500, runs = 1,
                       X.input = "data", tol = exp(-32), seed = 2021){
  bcluster.call <- match.call()
  
  if(!is.null(M) & runs > 1){
    print("Cluster memberships in M will be the starting point for run 1.")
    print("Subsequent runs will use random start.")
    user.yn <- readline("Continue? (y/n): ")
    if(user.yn != "y"){
      return(print("Cluster analysis stopped"))
    }
  } 
  if(!(measure %in% c("b", "Q"))){
    return(print("Value for measure must be one of \"b\" or \"Q\"", 
                 quote = FALSE))
  }
  
  if(measure %in% "b"){
    if(X.input %in% "data"){
      if(length(dim(X)) == 2){
        X <- array(X, c(dim(X)[1], dim(X)[2], 1), 
                   dimnames = 
                     list(dimnames(X)[[1]], dimnames(X)[[2]], "data"))
      }
      nJ <- dim(X)[2]
      X.bc <- barray(X)
    }
    if(X.input %in% "bc"){
      X.bc <- X
      X <- NULL
      nJ <- .findJ(dim(X.bc)[2])
    }
    nI <- dim(X.bc)[1]
    nJJ <- dim(X.bc)[2]
    nM <- dim(X.bc)[4]
  }
  if(measure %in% "Q"){
    if(X.input %in% "data"){
      X.bc <- X
    }
    if(X.input %in% "bc"){
      return(print("b-cluster analysis requires raw data for measure Q"))
    }
  }
  msg <- NULL
  if(length(dim(X.bc)) == 3){
    msg <- "Cluster analysis will proceed assuming there is only 1 response variable"
    if(!is.null(msg)) print(msg)
    X.bc <- array(X.bc, c(dim(X.bc), 1), 
                  dimnames = 
                    list(dimnames(X.bc)[[1]], dimnames(X.bc)[[2]], 
                         dimnames(X.bc)[[3]], "data"))
  }
  
  if(nI <= G){
    # or should I just return cluster memberships 1, 2, ..., nI ??
    return(print(paste0("For non-trivial cluster analysis, the number of clusters (G=", 
                 G, ") must be smaller than number of consumers (", nI, 
                 "). Stopping b-cluster analysis."))    )
  }
  
  if(is.null(dimnames(X.bc)[[1]])) dimnames(X.bc)[[1]] <- 1:nI
  if(is.null(dimnames(X.bc)[[2]])) dimnames(X.bc)[[2]] <- 1:nJJ
  if(is.null(dimnames(X.bc)[[3]])) dimnames(X.bc)[[3]] <- letters[2:3]
  if(is.null(dimnames(X.bc)[[4]])) dimnames(X.bc)[[4]] <- 1:nM
  
  X.bc.dim <- c(nI, nJJ, 2, nM)
  
  if(measure %in% "b"){
    totalB <- sum(abs(X.bc[,,1,] - X.bc[,,2,]))
  }
  if(measure %in% "Q"){
    totalQ <- sum(apply(X, c(1,3), stats::sd) > 0) * (nJ-1)
  }
  
  if(runs > 1){
    if(is.null(M[[1]])){
      Ms <- .startMs(nI, G, seed:(seed+runs-1), M = M, runs = runs)
    } 
    Ls <- lapply(seq_len(ncol(Ms)), function(i) Ms[,i])
    invisible(lapply(Ls, function(m){
      .bclustn(M = unlist(m), X.bc = X.bc, 
               X.bc.dim = X.bc.dim, measure = measure, 
               max.iter = max.iter, G = G, tol = tol)
    }))
  } else {
    if(is.list(M)){ 
      M <- unlist(M, use.names = FALSE)
    } else {
      if(is.null(M[[1]])){
        M <- .getM(nI, G, seed)
      }
    }
    invisible(.bclustn(M = unlist(M), X.bc = X.bc, 
                       X.bc.dim = X.bc.dim, measure = measure, 
                       max.iter = max.iter, G = G, tol = tol))
  }
}

#' Inspect/summarize many b-cluster analysis runs
#'
#' Inspect many runs of b-cluster analysis. Calculate sensory differentiation 
#' retained and recurrence rate.  
#' @name inspect
#' @param X list of multiple runs of b-cluster analysis results from 
#' \code{\link[cata]{bcluster.n}} or \code{\link[cata]{bcluster.h}} 
#' @param G number of clusters (ignored in non-hierarchical b-cluster analysis)
#' @param bestB  total sensory differentiation retained in the best solution. If
#' not provided, then \code{bestB} is determined from best solution in the runs
#' provided (in \code{X}).
#' @param bestM  cluster memberships for best solution. If not provided, then 
#' the best solution is determined from the runs provided (in \code{X}).
#' @param inspect.plot default (\code{TRUE}) plots results from the 
#' \code{\link[cata]{inspect}} function 
#' @return A data frame with unique solutions in rows and the following
#' columns:
#' \itemize{
#' \item{\code{B} : Sensory differentiation retained}
#' \item{\code{PctB} : Percentage of the total sensory differentiation retained}
#' \item{\code{B.prop} : Proportion of sensory differentiation retained compared
#' to best solution}
#' \item{\code{Raw.agree} : raw agreement with best solution}
#' \item{\code{Count} : number of runs for which this solution was observed} 
#' \item{\code{Index} : list index (i.e., run number) of first solution  
#' solution in \code{X} corresponding to this row}}
#' @export
#' @encoding UTF-8
#' @author J.C. Castura
#' @references Castura, J.C., Meyners, M., Varela, P., & Næs, T. (2022). 
#' Clustering consumers based on product discrimination in check-all-that-apply 
#' (CATA) data. \emph{Food Quality and Preference}, 104564. 
#' \doi{10.1016/j.foodqual.2022.104564}.
#' @examples
#' res <- bcluster.n(bread$cata[1:8, , 1:5], G = 3, runs = 3)
#' (ires <- inspect(res))
#' # get index of solution retaining the most sensory differentiation (in these runs)
#' indx <- ires$Index[1]
#' # cluster memberships for solution of this solution
#' res[[indx]]$cluster
inspect <- function(X, G = 2, bestB = NULL, bestM = NULL, inspect.plot = TRUE){
  # Functions
  get.retainedB <- function(y, k){
    lost.B <- sum(y$height[1:(length(y$height)-k+1)])
    retained.B <- rev(y$retainedB)[k]
    tot.B <- retained.B + lost.B
    return(c(retained.B, tot.B))
  }
  get.raw.agreement <- function(x, y){
    tblx <- table(x)
    tblr <- table(y)
    tblxr <- table(x,y)
    return(1 + (sum(tblxr^2) - sum(tblx^2)/2 - sum(tblr^2)/2) /
             choose(length(x), 2))
  }
  # 
  match.rows <- function(df1, df2, tol = 1e-8) {
    sapply(1:nrow(df1), function(i) {
      row1 <- as.numeric(df1[i, ])
      matches <- apply(df2, 1, function(row2){
        all(abs(row1 - as.numeric(row2)) < tol)
      } )
      which(matches)[1]
    })
  }
  # End functions
  n.sol <- length(X)
  if(!is.null(bestM[1])){
    if(length(bestM) != n.sol){
      return(print("Number of consumers in X must be equal to number of cluster  
                   memberships in bestM"))
    }
  }
  continue <- inspection.complete <- FALSE
  if(class(X[[1]]) %in% "hclust"){
    nI <- nrow(X[[1]]$merge) + 1
    # Mmat   :  membership matrix: rows=assessors, columns=runs
    Mmat <- matrix(unlist(lapply(X, stats::cutree, k = G)), nrow = nI, byrow = FALSE)
    # bb     :  sensory differentiation retained
    bb <- matrix(unlist(lapply(X, get.retainedB, G)), nrow = 2, byrow = FALSE)
    continue <- TRUE
  }
  if (class(X[[1]]) %in% "bclust.n"){
    nI <- length(X[[1]]$cluster)
    if(!all(unlist(lapply(X, function(ll){ ll$cluster[1] }))==1)){
      X <- lapply(X, function(this) {
        this$cluster <- .idclusters(this$cluster)
        return(this)
      })
    }
    # Mmat   :  membership matrix: rows=assessors, columns=runs
    Mmat <- matrix(unlist(lapply(X, function(x){
      return(x$cluster)})), nrow = nI, byrow = FALSE)
    # bb     :  sensory differentiation retained
    bb <- matrix(unlist(lapply(X, function(x){ return(c(x$retainedB,
                                                        x$totalB))})), nrow = 2)
    continue <- TRUE
  }
  if(continue){
    if(is.null(bestB)) bestB <- max(bb[1,])
    maxB <- max(bb)
    # bbMmat :  both, rows=runs, columns=assessors
    bbMmat <- cbind(rep(1, n.sol), bb[1,], t(Mmat))
    # all.sol :  unique solutions
    all.sol <- stats::aggregate(bbMmat[,1] ~ ., data = bbMmat, sum)[,-1]
    # all.sol :  order from best to worst
    # all.sol <- all.sol[order(all.sol[,1], decreasing = TRUE), ]
    # order by B, then by count
    all.sol <- all.sol[order(all.sol[,1], 
                             all.sol[, ncol(all.sol)], decreasing = TRUE),]
    # best.sol:  cluster memberships of best solution
    best.sol <- unlist(all.sol[1, -c(1, ncol(all.sol))])
    # rownames.all.sol <- match.rows(all.sol[, -ncol(all.sol)], bbMmat[, -1])
    # this step can be improved later
    if(is.null(bestM)){
      bestM <- best.sol
    } 
    raw.a <- apply(as.matrix(all.sol[, -c(1,ncol(all.sol))]), 1,
                   function(x, ref=bestM){
                     tblx <- table(x)
                     tblr <- table(ref)
                     tblxr <- table(x,ref)
                     return(1 + (sum(tblxr^2) -
                                   sum(tblx^2)/2 -
                                   sum(tblr^2)/2) /
                              choose(length(x), 2)) } )
    all.solutions <-
      cbind(all.sol[,1], 
            100*(round(all.sol[,1] / max(bb)[1], 4)),
            all.sol[,1] / bestB, 
            round(raw.a,4), all.sol[,ncol(all.sol)], 
            match.rows(all.sol[, -ncol(all.sol)], bbMmat[, -1]), #rownames.all.sol, #as.numeric(rownames(all.sol)),
            as.matrix(all.sol[, -c(1, ncol(all.sol))]))
    colnames(all.solutions)[1:6] <- c("B", "PctB", "B.prop", "Raw.agree", 
                                      "Count", "Index")
    colnames(all.solutions)[-c(1:6)] <- paste0("c.", 1:nI)
    rownames(all.solutions) <- NULL
    if(inspect.plot){
      tbl <- all.solutions[, -c(1:6)]
      if(is.null(dim(tbl))){
        tbl <- rbind(tbl)
      }
      Gs <- unlist(apply(tbl, 1, max))
      if (length(unique(Gs)) == 1){
        G <- max(Gs)
        inspect.plot.main <- paste0("Cluster analysis (G=", G, ") from ", 
                                    n.sol, " runs")
      } else {
        # Gs differ
        if(max(Gs) - min(Gs) == 1){ 
          inspect.plot.main.connector <- ", "
        } else {
          inspect.plot.main.connector <- ", ..., "
        }
        inspect.plot.main <- paste0("Cluster analysis (G={", min(Gs), 
                                    inspect.plot.main.connector, 
                                    max(Gs), ") from ", 
                                    n.sol, " runs")
      }
      curr.par <- graphics::par(no.readonly = TRUE) # current parameters
      on.exit(graphics::par(curr.par)) # return to current parameters on exit
      graphics::par(mar=c(7, 4, 4, 2) + 0.1)
      ys <- range(all.sol[,1] / bestB)
      if(length(ys) == 1){
        ys <- rep(ys, 2)
      }
      plot(x = c(1,0), y = ys, type="n", xlim = c(1,0),
           main = inspect.plot.main,
           ylab = "Sensory Differentiation Retained (B)",
           xlab = "Proportion of runs with this B or higher",
           sub = paste0("Labels indicate % agreement with best solution"))
      graphics::text(x = cumsum(all.sol[,ncol(all.sol)]) /
                       sum(all.sol[,ncol(all.sol)]),
                     y = all.sol[,1]/bestB,
                     labels = as.character(round(raw.a,2)*100), srt=45)
    }
    inspection.complete <- TRUE
  }
  if(!inspection.complete){
    return("Input must be a list of b-cluster analysis results (many runs)")
  } else {
    num.best <- length(all.solutions[,1] == all.solutions[1,1])
    if(num.best > 1){
      cat("The best solution from these ", n.sol, " runs (row 1) ",
          "is compared with the solutions found in other runs.", 
          "\n", sep = "")
    }
    all.solutions <- as.data.frame(all.solutions)
    all.solutions[,6] <- as.integer(all.solutions[,6])
    return(all.solutions[,1:6])
  }
}