R/randomise.R

Defines functions .getShannonVector .getShannonMatrix .shannon .getPerms .KirchhoffIndex randomize randomise

Documented in randomise randomize

#' Randomised group of items
#'
#' Set a randomised group of items for crowdsourcing citizen science.
#' Generate designs for ranking of options. It is designed for tricot trials 
#' specifically (comparing 3 options), but it will also work with comparisons 
#' of any other number of options. 
#' The design strives for approximate A optimality, this means that it is robust 
#' to missing observations. It also strives for balance for positions of each option.
#' Options are equally divided between first, second, third, etc. position. 
#' The strategy is to create a "pool" of combinations that does not repeat 
#' combinations and is A-optimal. Then this pool is ordered to make subsets of 
#' consecutive combinations also relatively balanced and A-optimal
#' 
#' @author Jacob van Etten
#' @param ncomp an integer for the number of items each observer compares
#' @param nobservers an integer for the number of observers
#' @param nitems an integer for the number of items tested in the project
#' @param itemnames a character for the name of items tested in the project
#' @return A dataframe with the randomised design
#' @examples 
#' ni <- 3
#' no <- 10
#' nv <- 4
#' inames <- c("mango","banana","grape","apple")
#'  
#' randomise(ncomp = ni,
#'           nobservers = no,
#'           nitems = nv,
#'           itemnames = inames)
#'           
#' @aliases randomize
#' @importFrom Matrix Diagonal
#' @importFrom methods as
#' @importFrom RSpectra eigs
#' @importFrom utils combn
#' @importFrom tibble as_tibble
#' @export
randomise <- function(ncomp = 3, nobservers = NULL, nitems = NULL, 
                      itemnames = NULL) {
  
  if (is.null(nobservers)) {
    stop("nobservers is missing with no default")
  }
  
  if (is.null(nitems)) {
    stop("nitems is missing with no default")
  }
  
  if (is.null(itemnames)) {
    stop("itemnames is missing with no default")
  }
  
  if (nitems != length(itemnames)) {
    stop("nitems is different than provided itemnames")
  }
  
  if (nitems < 3) {
    stop("nitems must be higher than 2")
  }
  
  # Varieties indicated by integers
  varieties <- seq_len(nitems)
  
  # Full set of all combinations
  varcombinations <- t((utils::combn(varieties, ncomp)))
  
  # if the full set of combinations is small and can be covered at least once
  # the set will include each combination at least once
  ncomb <- dim(varcombinations)[1]
  n <- floor(nobservers / ncomb)
  nfixed <- ncomb * n
  vars1 <-
    varcombinations[c(rep(1:(dim(varcombinations)[1]), times = n)), ]
  
  # the remaining combinations are sampled randomly but in a balanced way
  # this means that no combination enters more than once
  nremain <- nobservers - nfixed
  
  # create set to get to full number of observers
  vars2 <- matrix(nrow = nremain, ncol = ncomp)
  
  # set up array with set of combinations
  varcomb <- matrix(0, nrow = nitems, ncol = nitems)
  
  if (dim(vars2)[1] > 0.5) {
    
    # select combinations for the vars2 set that optimize design
    for (i in 1:nremain) {
      
      # calculate frequency of each variety
      sumcomb <- rowSums(varcomb) + colSums(varcomb)
      
      # priority of each combination is equal to Shannon index of varieties 
      # in each combination
      prioritycomb <- apply(varcombinations, 1, function(x){ 
        .getShannonVector(x, sumcomb, nitems)
      })
      
      # highest priority to be selected is the combination which has the lowest 
      # Shannon index
      selected <- which(prioritycomb == min(prioritycomb))
      
      # if there are ties, find out which combination reduces Kirchhoff index most
      
      if (length(selected) > 1 & i > 25) {
        
        reduce <- max(2, min(10, round(5000/nobservers), round(200/nitems)))
        
        # randomly subsample from selected if there are too many combinations to check
        if (length(selected) > reduce) {
          selected <- sample(selected, reduce)
        }
        
        # get a nitems x nitems matrix with number of connections
        
        # calculate Kirchhoff index and select smallest value
        khi <- vector(length = length(selected))
        for (k in 1:length(selected)) {
          
          evalgraph <- varcomb
          index <- t(combn(varcombinations[selected[k],], 2))
          evalgraph[index] <- evalgraph[index] + 1
          khi[k] <- .KirchhoffIndex(evalgraph)
          
        }
        
        selected <- selected[which(khi == min(khi))]
        
      }
      
      # if there are still ties between ranks of combinations, selected randomly 
      # from the ties
      if (length(selected) > 1) { 
        selected <- sample(selected, 1)
      }
      
      # assign the selected combination
      vars2[i,] <- varcombinations[selected,]
      
      varcomb[t(combn(varcombinations[selected,],2))] <- 
        varcomb[t(combn(varcombinations[selected,],2))] + 1
      
      # remove used combination
      varcombinations <- varcombinations[-selected,]
      
    }
  }
  
  # merge vars1 and vars2 to create the full set of combinations
  vars <- rbind(vars1, vars2)
  
  # create empy object to contain ordered combinations of vars
  varOrdered <- matrix(NA, nrow = nobservers, ncol = ncomp)
  
  # set up array with set of combinations
  varcomb <- matrix(0, nrow = nitems, ncol = nitems)
  
  # fill first row
  selected <- sample(1:nobservers, 1)
  varcomb[t(combn(vars[selected,],2))] <- 1
  varOrdered[1,] <- vars[selected,]
  vars <- vars[-selected,]
  
  # optimize the order of overall design by repeating a similar procedure to the above
  for (i in 2:(nobservers-1)) {
    
    # calculate frequency of each variety
    sumcomb <- rowSums(varcomb) + colSums(varcomb)
    
    # priority of each combination is equal to Shannon index of varieties 
    # in each combination
    prioritycomb <- apply(vars, 1, function(x){ 
      .getShannonVector(x, sumcomb, nitems)
    })
    
    # highest priority to be selected is the combination which has the 
    # lowest Shannon index
    selected <- which(prioritycomb == min(prioritycomb))
    
    # if there are ties, find out which combination reduces Kirchhoff index most
    if (length(selected) > 1 & i > 25) {
      
      reduce <- max(2, min(10, round(5000/nobservers), round(200/nitems)))
      
      # randomly subsample from selected if there are too many combinations to check
      if (length(selected) > reduce) {
        selected <- sample(selected, reduce)
      }
      
      # get a nitems x nitems matrix with number of connections
      sumcombMatrix <- varcomb * 0
      
      # in this case, get matrix to calculate Kirchhoff index only for 
      # last 10 observers
      for (j in max(1,i-10):(i-1)) {
        index <- t(combn(varOrdered[j,],2))
        sumcombMatrix[index] <- sumcombMatrix[index] + 1
        
      }
      
      # calculate Kirchhoff indices for the candidate matrix corresponding
      # to each row in selected
      khi <- vector(length = length(selected))
      for (k in 1:length(selected)) {
        
        evalgraph <- sumcombMatrix
        index <- t(combn(vars[selected[k],], 2))
        evalgraph[index] <- evalgraph[index] + 1
        khi[k] <- .KirchhoffIndex(evalgraph)
        
      }
      
      # select combination that produces the lowest Kirchhoff index
      selected <- selected[which(khi == min(khi, na.rm=TRUE))]
      
    }
    
    # if there are still ties between ranks of combinations, select one randomly
    if (length(selected) > 1) { 
      selected <- sample(selected, 1) 
    }
    
    # assign the selected combination
    varOrdered[i,] <- vars[selected,]
    varcomb[t(combn(vars[selected,],2))] <- varcomb[t(combn(vars[selected,],2))] + 1
    
    # remove used combination
    vars <- vars[-selected, ]
    
  }
  
  # assign last one
  varOrdered[nobservers,] <- vars
  
  # Equally distribute positions to achieve order balance
  # First create matrix with frequency of position of each of nitems
  position <- matrix(0, ncol = ncomp, nrow = nitems)
  
  # Sequentially reorder sets to achieve evenness in positions
  # Shannon is good here, because evenness values are proportional
  # the H denominator in the Shannon formula is the same
  
  for (i in 1:nobservers) {
    
    varOrdered_all <- .getPerms(varOrdered[i,])
    varOrdered_Shannon <- apply(varOrdered_all, 1, function(x) {
      .getShannonMatrix(x, position)
    })
    varOrdered_i <- varOrdered_all[which(varOrdered_Shannon == 
                                           min(varOrdered_Shannon))[1],]
    varOrdered[i,] <- varOrdered_i
    pp <- position * 0
    pp[cbind(varOrdered_i,1:ncomp)] <- 1
    position <- position + pp
    
  }
  
  # The varOrdered matrix has the indices of the elements
  # Create the final matrix
  finalresults <- matrix(NA, ncol = ncomp, nrow = nobservers)
  
  # loop over the rows and columns of the final matrix and put
  # the elements randomized
  # with the indexes in varOrdered
  for (i in seq_len(nobservers)){
    for (j in seq_len(ncomp)){
      finalresults[i,j] <- itemnames[varOrdered[i,j]]
    }
  }
  
  dimnames(finalresults) <- list(seq_len(nobservers), 
                                 paste0("item_", LETTERS[1:ncomp]))
  
  finalresults <- tibble::as_tibble(finalresults)
  
  return(finalresults)
  
}

#' @inheritParams randomise
#' @export
randomize <- function(...){
  
  randomise(...)
  
}

# Define function for Kirchhoff index
# This index determines which graph is connected in the most balanced way
# In this context, lower values (lower resistance) is better
.KirchhoffIndex <- function(x) {
  # The input matrix only has one triangle filled
  # First we make it symmetric
  x <- x + t(x)
  
  # Then some maths to get the Kirchhoff index
  
  # Using rARPACK:eigs, setting k to n-1 because we don't need the 
  # last eigen value
  Laplacian <- methods::as(Matrix::Diagonal(x = colSums(x)) - x, 
                           "dsyMatrix")
  lambda <-
    try(RSpectra::eigs(Laplacian,
                       k = (dim(Laplacian)[1] - 1),
                       tol = 0.01,
                       retvec = FALSE)$values, silent = TRUE)
  if (inherits(lambda, "try-error"))
    lambda <- Inf
  # RSpectra:eigs is faster than base:eigen
  # lambda <- eigen(Laplacian)$values
  # lambda <- lambda[-length(lambda)]
  
  return(sum(1 / lambda))
  
}

# get all permutations
.getPerms <- function(x) {
  if (length(x) == 1) {
    return(x)
  }
  else {
    res <- matrix(nrow = 0, ncol = length(x))
    for (i in seq_along(x)) {
      res <- rbind(res, cbind(x[i], Recall(x[-i])))
    }
    return(res)
  }
}

# Shannon (as evenness measure)
.shannon <- function(x){
  sum(ifelse(x == 0, 0, x * log(x)))
}

# Get Shannon index for order positions
.getShannonMatrix <- function(x, position) {
  pp <- position * 0
  pp[cbind(x, 1:length(x))] <- 1
  pp <- position + pp
  return(.shannon(as.vector(pp)))
  
}

.getShannonVector <- function(x, sumcomb, nitems) {
  xi <- rep(0, times = nitems)
  xi[x] <- 1
  return(.shannon(sumcomb + xi))
  
}
kauedesousa/ClimMobTools documentation built on March 21, 2020, 5:41 p.m.