R/bootcoldist.R
In pavo: Perceptual Analysis, Visualization and Organization of Spectral Colour Data

Documented in bootcoldist

#' Bootstrap colour distance confidence intervals
#'
#' Uses a bootstrap procedure to generate confidence intervals
#'  for the mean colour distance between two or more samples of colours
#'
#' @param vismodeldata (required) quantum catch colour data.
#'  Can be the result from [vismodel()], or [colspace()]. Data may also be
#'  independently calculated quantum catches, in the form of a data frame with
#'  columns representing photoreceptors.
#' @param by (required) a numeric or character vector indicating the group to which each row from
#'  the object belongs to.
#' @param boot.n number of bootstrap replicates (defaults to 1000)
#' @param alpha the confidence level for the confidence intervals (defaults to 0.95)
#' @param raw should the full set of bootstrapped distances (equal in length to boot.n)
#' be returned, instead of the summary distances and CI's? Defaults to FALSE.
#' @param ... other arguments to be passed to [coldist()]. Must at minimum
#' include `n` and `weber`. See [coldist()] for details.
#'
#' @inherit getspec details
#'
#' @return a matrix including the empirical mean and bootstrapped
#'  confidence limits for dS (and dL if `achromatic = TRUE`), or a data.frame
#'  of raw bootstraped dS (and dL if `achromatic = TRUE`) values equal in length to boot.n.
#'
#' @examples
#' \donttest{
#' # Run the receptor-noise limited model, using the visual phenotype
#' # of the blue tit
#' data(sicalis)
#' vm <- vismodel(sicalis, achromatic = "bt.dc", relative = FALSE)
#' gr <- gsub("ind..", "", rownames(vm))
#' bootcoldist(vm, by = gr, n = c(1, 2, 2, 4), weber = 0.1, weber.achro = 0.1)
#'
#' # Run the same again, though as a simple colourspace model
#' data(sicalis)
#' vm <- vismodel(sicalis, achromatic = "bt.dc")
#' space <- colspace(vm)
#' gr <- gsub("ind..", "", rownames(space))
#' bootcoldist(space, by = gr)
#'
#' # Estimate bootstrapped colour-distances for a more 'specialised' model,
#' # like the colour hexagon
#' data(flowers)
#' vis.flowers <- vismodel(flowers,
#'   visual = "apis", qcatch = "Ei", relative = FALSE,
#'   vonkries = TRUE, achromatic = "l", bkg = "green"
#' )
#' flowers.hex <- colspace(vis.flowers, space = "hexagon")
#' pop_group <- c(rep("pop_1", nrow(flowers.hex) / 2), rep("pop_2", nrow(flowers.hex) / 2))
#' bootcoldist(flowers.hex, by = pop_group)
#' }
#'
#' @export
#' @importFrom future.apply future_lapply
#' @importFrom progressr with_progress progressor
#' @importFrom stats aggregate setNames
#'
#' @references Maia, R., White, T. E., (2018) Comparing colors using visual models.
#'  Behavioral Ecology, ary017 \doi{10.1093/beheco/ary017}

bootcoldist <- function(vismodeldata, by, boot.n = 1000, alpha = 0.95, raw = FALSE, ...) {

  # Define an inner function to calculate the geometric mean
  gmean <- function(x, na.rm = TRUE, zero.propagate = FALSE) {
    # If any of the values are negative, return NaN
    if (any(x < 0, na.rm = TRUE)) {
      return(NaN)
    }
    # If zero.propagate is TRUE and there are zeros in x, return 0
    if (zero.propagate) {
      if (any(x == 0, na.rm = TRUE)) {
        return(0)
      }
      # Otherwise, return the geometric mean
      exp(mean(log(x), na.rm = na.rm))
    } else {
      # If zero.propagate is FALSE, calculate the geometric mean excluding zeros
      exp(sum(log(x[x > 0]), na.rm = na.rm) / length(x))
    }
  }

  # Convert any non-numeric columns to (nonsense) numeric values
  # They're not used anyway, and subsetting etc. strips attributes,
  # so this is just a simple/lazy workaround
  num_cols <- unlist(lapply(vismodeldata, is.numeric))
  vismodeldata[, !num_cols] <- 0

  # Rescale any x-y-z positional data by adding a constant
  # Only applies to colspace objects, since (unlike for RN data) colour distances
  # are calculated based on coordinates, rather than qcatches. But this
  # caused problems when calculating geometric means, because coordinate
  # data often contain negative values. So this re-scales all coordinate
  # systems so they can never be negative, without affecting
  # the distances between points.
  vismodeldata[intersect(names(vismodeldata), c("x", "y", "z"))] <-
    vismodeldata[intersect(names(vismodeldata), c("x", "y", "z"))] + 100

  # Start preparing the arguments

  arg0 <- list(...)

  # Check for achromatic argument. 'achromatic' was previously called 'achro', 
  # so this handles backward compatibility.
  # TODO: add a warning about this so users update their scripts??
  if (is.null(arg0$achromatic)) {
    arg0$achromatic <- arg0$achro
  }

  # Determine if RN model should be used. 
  # For non-colspace objects, the RN model is required.
  useRNmodel <- !inherits(vismodeldata, "colspace")

  if (is.null(arg0$achromatic)) {
    if (is.null(attr(vismodeldata, "visualsystem.achromatic"))) {
      stop('argument "achromatic" to be passed to "coldist" is missing', call. = FALSE)
    }

    if (attr(vismodeldata, "visualsystem.achromatic") == "none") {
      arg0$achromatic <- FALSE
    } else {
      arg0$achromatic <- TRUE
    }
  }

  # Only require n & webers if using RN model
  if (useRNmodel) {
    # Receptor density
    if (is.null(arg0$n)) {
      stop('argument "n" to be passed to "coldist" is missing', call. = FALSE)
    }
    # Chromatic weber fraction
    if (is.null(arg0$weber)) {
      stop('argument "weber" to be passed to "coldist" is missing', call. = FALSE)
    }
    # Noise type
    if (is.null(arg0$noise)) {
      arg0$noise <- "neural"
    }
    # Weber cone ref
    if (is.null(arg0$weber.ref)) {
      arg0$weber.ref <- "longest"
    }
    if (arg0$achromatic) {
      if (is.null(arg0$weber.achro)) {
        stop('argument "weber.achro" to be passed to "coldist" is missing', call. = FALSE)
      }
    }
  } else {
    arg0$weber <- NULL
    arg0$n <- NULL
    arg0$noise <- NULL
    arg0$weber.ref <- NULL
    arg0$weber.achro <- NULL
  }
  
  # Check if qcatch attribute exists, if not then stop the function with an error
  if (is.null(arg0$qcatch)) {
    if (is.null(attr(vismodeldata, "qcatch"))) {
      stop('argument "qcatch" to be passed to "coldist" is missing', call. = FALSE)
    }
    arg0$qcatch <- attr(vismodeldata, "qcatch")
  }
  
  # Reorder the visual model data by group
  sortinggroups <- order(by)
  vismodeldata <- vismodeldata[sortinggroups, ]
  by <- by[sortinggroups]

  # Sample sizes for each group
  samplesizes <- table(by)

  # Group-wise geometric mean deltaS for the empirical data
  empgroupmeans <- aggregate(vismodeldata, list(by), gmean, simplify = TRUE)
  row.names(empgroupmeans) <- empgroupmeans[, 1]
  empgroupmeans <- empgroupmeans[, -1]

  # Set the attributes for the grouped means
  datattributes <- grep("names", names(attributes(vismodeldata)),
    invert = TRUE, value = TRUE, fixed = TRUE
  )

  # Prepare empirical argument list and calculate empirical color distances
  attributes(empgroupmeans)[datattributes] <- attributes(vismodeldata)[datattributes]
  
  # Begin bootstrapping procedure
  # This involves sampling with replacement from the original data (group-wise), calculating 
  # group-wise means for each bootstrap sample, and then performing the color distance calculation for each.
  # After obtaining the bootstrap color distance calculations, calculate confidence intervals and 
  # possibly return raw bootstrap results.
  emparg <- arg0
  emparg$modeldata <- empgroupmeans

  empcd <- do.call(coldist, emparg)

  empdS <- setNames(empcd$dS, paste(empcd$patch1, empcd$patch2, sep = "-"))

  # separate data by group
  bygroup <- lapply(unique(by), function(x) vismodeldata[by == x, ])

  # split(dat, by) also works but is about twice as slow
  names(bygroup) <- unique(by)

  # create vectors of indices to sample
  its <- lapply(samplesizes, function(x) sample(seq_len(x), x * boot.n, replace = TRUE))

  # use the indices from its to sample from the data
  # returns a list with length = number of by
  # and rows = (sample size for that group) * (the number of bootstrap replicates) in each
  bootsamples <- lapply(seq_along(bygroup), function(x) bygroup[[x]][its[[x]], ])
  
  # next, split by bootstrap replicate
  # preserving the same sample size as that original group had
  #
  # list with length = number of by
  # and values = index for the bootstrap replicate that sample belongs to
  bootindex <- lapply(samplesizes, function(x) as.character(rep(seq_len(boot.n), each = x)))

  # use the index to break samples into bootstrap replicates
  # returns a list with length = number of by
  # each entry is itself a list with length = number of replicates
  bootbygroup <- lapply(seq_along(bygroup), function(x) {
    lapply(unique(bootindex[[x]]), function(z) bootsamples[[x]][bootindex[[x]] == z, ])
  })

  # now take the column means for all bootstrapped by
  # returns a list with length = number of by
  # each row in these = the (geometric) mean of bootstrap replicates
  groupcolmeans <- lapply(bootbygroup, function(z) {
    do.call(rbind, lapply(z, function(x) apply(x, 2, gmean))) # nolint
  })

  # now "split and merge"
  # creating a list with length = number of bootstrap replicates
  # and rows in each entry = mean per group in that replicate
  bootgrouped <- lapply(seq_len(boot.n), function(x) {
    do.call(rbind, lapply(groupcolmeans, "[", x, )) # nolint: false positive
  })

  # ...name the rows by group
  bootgrouped <- lapply(bootgrouped, function(x) {
    row.names(x) <- unique(by)
    x
  })

  # ...and give them the necessary attributes
  bootgrouped <- lapply(bootgrouped, as.data.frame)
  attribs <- attributes(vismodeldata)
  attribs <- attribs[grep("data|names", names(attribs), invert = TRUE)]

  for (i in seq_along(bootgrouped)) {
    attributes(bootgrouped[[i]])[names(attribs)] <- attribs
  }
  
  # Creates temporary bootstrap function, which is applied to each bootstrap sample
  tmpbootcdfoo <- function(x) {
    tmparg <- arg0
    tmparg$modeldata <- x
    do.call(coldist, tmparg)
  }
  
  # Handles progress bar for running bootstrap calculations
  with_progress({
    p <- progressor(along = bootgrouped)
    bootcd <- future_lapply(bootgrouped, function(z) {
      p()
      tryCatch(suppressMessages(tmpbootcdfoo(z)),
        error = function(e) NULL
      )
    }, future.seed = TRUE)
  })

  # Extract deltaS values from bootcd and restructure in one dataframe
  bootdS <- do.call(
    rbind,
    lapply(bootcd, function(x) {
      setNames(x$dS, paste(x$patch1, x$patch2, sep = "-"))
    })
  )

  # Error handling: if the dimension of bootdS is less than boot.n, 
  # stop function and print error message
  if (dim(bootdS)[1] < boot.n) {
    stop("Bootstrap sampling encountered errors.")
  }

  # Order, find quantiles, and set up deltaS confidence intervals
  quantileindices <- round(boot.n * ((1 + c(-alpha, alpha)) / 2))
  bootdS <- apply(bootdS, 2, sort)
  dsCI <- bootdS[quantileindices, , drop = FALSE]
  rownames(dsCI) <- c("dS.lwr", "dS.upr")

  # Ensure names match with empirical values (even though they should match already)
  dsCI <- dsCI[, names(empdS), drop = FALSE]

  # Define empirical deltaS mean
  dS.mean <- empdS

  # Combine empirical and bootstrap deltaS statistics into a results dataframe
  res <- t(rbind(dS.mean, dsCI))

  # If raw = TRUE, create a new dataframe with raw bootstrapped deltaS distances
  if (raw) {
    rawres <- as.data.frame(bootdS)
    names(rawres) <- paste0(names(rawres), "_dS")
  }

  # If achromatic = TRUE, calculate empirical and bootstrap statistics for achromatic distances (deltaL)
  if (arg0$achromatic) {
    empdL <- setNames(empcd$dL, paste(empcd$patch1, empcd$patch2, sep = "-"))

    bootdL <- do.call(
      rbind,
      lapply(bootcd, function(x) {
        setNames(x$dL, paste(x$patch1, x$patch2, sep = "-"))
      })
    )

    bootdL <- apply(bootdL, 2, sort)
    dlCI <- bootdL[quantileindices, , drop = FALSE]
    rownames(dlCI) <- c("dL.lwr", "dL.upr")

    # Ensure names match with empirical values (even though they should match already)
    dlCI <- dlCI[, names(empdL), drop = FALSE]
    
    # Define empirical deltaL mean
    dL.mean <- empdL
    
    # Combine empirical and bootstrap deltaL statistics into a results dataframe
    res <- cbind(res, t(rbind(dL.mean, dlCI)))

    # If raw = TRUE, create a new dataframe with raw bootstrapped deltaL distances
    if (raw) {
      bootdL <- as.data.frame(bootdL)
      names(bootdL) <- paste0(names(bootdL), "_dL")
      rawres <- cbind(rawres, bootdL)
    }
  }

  if (raw) {
    res <- rawres
  }

  res
}

Any scripts or data that you put into this service are public.

pavo documentation built on Sept. 24, 2023, 5:06 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

pavo
Perceptual Analysis, Visualization and Organization of Spectral Colour Data

R/bootcoldist.R
In pavo: Perceptual Analysis, Visualization and Organization of Spectral Colour Data

Defines functions bootcoldist

Documented in bootcoldist

Try the pavo package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

pavo Perceptual Analysis, Visualization and Organization of Spectral Colour Data

R/bootcoldist.R In pavo: Perceptual Analysis, Visualization and Organization of Spectral Colour Data

Defines functions bootcoldist

Documented in bootcoldist

Try the pavo package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

pavo
Perceptual Analysis, Visualization and Organization of Spectral Colour Data

R/bootcoldist.R
In pavo: Perceptual Analysis, Visualization and Organization of Spectral Colour Data