R/sample_by_individuals.R

#' Subsample a dataset by individuals
#'
#' Subsample a dataset so we only count some of the individuals - either
#' sampling until a fixed limit is met, or sampling a fraction of total,
#' simulating a fractional sampling effort or over a fraction of the
#' area of each subcommunity.
#'
#' @param dataset A data frame containing abundance or incidence data
#' @param count The number of individuals to retain (or fewer if fewer present)
#' @param fraction The fraction of individuals in each subcommunity to retain
#' @return The subsampled dataset as a tibble
#'
#' @export
#'
#' @examples
#' library(iNEXT)
#' data(bird)
#' sample_by_individuals(bird, count = 100)
#'
#' data(ciliates)
#' eto.incidence <- ciliates$EtoshaPan
#' sample_by_individuals(eto.incidence, fraction = 0.1)
#'
sample_by_individuals <- function(dataset, count, fraction)
{
  if (missing(count) && missing(fraction))
    stop("Must include either count or fraction to sample.")
  subsample <- as.data.frame(dataset)
  rows <- nrow(subsample)
  if (all(subsample %in% c(0, 1)))
  { # Incidence data
    if (missing(count))
    {
      warning("Cannot sample for a fraction of the total number of species present for incidence data, as this is unknown. Sampling for a fraction of total species instead.")
      count <- round(rows * fraction)
    }
    for (col in seq_along(subsample))
    {
      vec <- which(subsample[[col]]==1)
      this.count <- min(count, length(vec))
      vec <- sample(vec, this.count)
      subsample[[col]] <- rep(0, rows)
      subsample[[col]][vec] <- 1
    }
  } else { # Abundance data
    for (col in seq_along(subsample))
    {
      vec <- subsample[[col]]
      if (missing(count))
        count <- round(sum(vec) * fraction)
      if (count < sum(vec))
        subsample[[col]] <- as.vector(rmultinom(1, count, vec/sum(vec)))
      else
        subsample[[col]] <- vec
    }
  }
  return(tibble::as_data_frame(subsample))
}
boydorr/sampling documentation built on May 23, 2019, 1:45 p.m.