#' Catch Per Unit Effort simulator
#'
#' Estimates the relationship between a denominator and a numerator variable over time
#'
#' @param catch Data table (or object that can be coerced to one) with, minimally, two numeric columns called Start and End.
#' @param effort Data table (or object that can be coerced to one) with, minimally, two numeric columns called Start and End.
#' @param wt.catch Numeric vector: the weight to be applied to each row in `catch', or a constant weight to be applied to all.
#' Defaults to 1.
#' @param wt.effort Numeric vector: the weight to be applied to each row in `effort`, or a constant weight to be applied to all.
#' Defaults to 1.
#' @param context.fields Character vector specifying the column(s) in data which define the minimal stratigraphic entities to analyse.
#' Defaults to "ID".
#' @param UoA Unit of Analysis: character vector of names of additional columns by which to group data when aggregating weights,
#' on top of those specified in context.field. For example, should different taxa be lumped together when analysing bone remains
#' from a table of contexts? Defaults to NULL.
#' @param quant.list Numeric vector of quantiles to be calculated in a summary table. Defaults to c(0.025,0.25,0.5,0.75,0.975).
#' @param start.date Numeric: the start of time period to be considered. Defaults to lowest value in data$Start.
#' @param end.date Numeric: the end of time period to be considered. Defaults to highest value in data$End.
#' @param bin.width Numeric: the resolution of the analysis, in units of time. Defaults to 100.
#' @param reps Integer: the number of times the simulation will be run. Defaults to 100.
#' @param RoC Rate of Change. Logical: should rates of change between adjacent bins be calculated alongside the raw counts?
#' @param small.n Numeric: vector specifying one or more cut-off values to be used to flag periods of where the underlying sample size (i.e.
#' magnitude of effort) is small. If multiple values are passed they should be in descending order.
#' @return Minimally, a list with two named elements:
#' "full" is a data table with six or seven columns: 'rep.no', integer specifying simulation run; 'bin', character specifying
#' chronological bin in terms of date range; 'bin.no' integer specifying number of bin, counting from earliest; 'catch', giving the
#' simulated frequency of 'catch'; and 'effort', giving the simulated magnitude of 'effort'. If RoC=TRUE there will be an additional
#' column, 'RoC', giving the rate of change in cpue between the given bin and the next.
#' "summary" is a second long format data table with four named columns: 'bin', as above; 'V1', the relevant value for the given bin
#' at a given quantile; 'quantile', the quantile at which V1 is calculated; 'id', character specifying which column from "full" V1 is
#' based on: "cpue" or "RoC" (catch and effort are ignored when summarising).
#' If one or more values are passed to 'small.n', then this list will have a third element, "boxes". This is a list of length equal
#' to the length of small.n. Each component is a list of four-row data frames giving coordinates that define boxes around periods
#' where the simulated value of effort is below the corresponding value in small.n, set up for plotting using grey.zones (either
#' alone or within any of the main archSeries plotting functions).
#' @export
#' @import data.table
#' @examples
#' dates <- data.table(Start=c(450, 450, 600), End=c(700, 800, 650), frags=c(3,6,2), vol=c(40, 40, 40))
#' x <- cpue(dates, dates, dates$frags, dates$vol, context.fields=NULL, small.n=1, reps=1000)
cpue <- function(catch, effort, wt.catch=1, wt.effort=1, context.fields=c("ID"), UoA=NULL, quant.list=c(0.025, 0.25, 0.5, 0.75, 0.975),
start.date=NULL, end.date=NULL, bin.width=100, reps=100, RoC=FALSE, small.n=NULL) {
End <- Start <- breaks <- sim <- bin <- bin.no <- NULL
#Tidy up input data and apply filters
catch <- data.table(cbind(catch, wt.catch)) #appends weights to list of date ranges, recycling if necessary (e.g. for uniform weight)
effort <- data.table(cbind(effort, wt.effort)) #appends weights to list of date ranges, recycling if necessary (e.g. for uniform weight)
#Read start and end dates from input data if not specified
if(is.null(start.date)) {
start.date <- min(catch$Start, effort$Start)
}
if(is.null(end.date)) {
end.date <- max(catch$End, effort$End)
}
catch <- catch[End >= start.date & Start <= end.date] #excludes ranges that fall entirely outside the study period
effort <- effort[End >= start.date & Start <= end.date] #excludes ranges that fall entirely outside the study period
#Aggregate data
catch <- catch[, j=list(wt.catch=sum(as.numeric(wt.catch))), by=c(context.fields, "Start", "End", UoA)]
effort <- effort[, j=list(wt.effort=sum(as.numeric(wt.effort))), by=c(context.fields, "Start", "End")]
if(length(wt.catch)==1) {catch[, wt.catch := 1]}
if(length(wt.effort)==1) {effort[, wt.effort := 1]}
data <- merge(catch, effort, by=c(context.fields, "Start", "End"), all=TRUE)
#Set up breaks and labels
breaks <- seq(start.date, end.date, bin.width) #sets breaks
labels <- numeric(0)
for(i in 1:(length(breaks)-1 )) {
labels[i] <- paste(breaks[i], breaks[i + 1], sep="-") #sets bin labels
}
#Perform simulation
rep.no <- rep(1:reps, each=nrow(data))
data <- cbind(rep.no, data) #recycles input data 'reps' times to provide frame for simulation
data[, sim := {x <- runif(nrow(data)); (x * (data[, End] - data[, Start])) + data[, Start]}] #simulates a date for each row
data[, bin := cut(sim, breaks, labels=labels)] #finds the relevant bin for each simulated date
#Prepare results table
catch <- data[is.na(wt.catch)==FALSE & is.na(bin)==FALSE, j=list(catch=sum(as.numeric(wt.catch))), by=list(rep.no, bin)] #sums weights by bin and rep number
effort <- data[is.na(wt.effort)==FALSE & is.na(bin)==FALSE, j=list(effort=sum(as.numeric(wt.effort))), by=list(rep.no, bin)] #sums weights by bin and rep number
frame <- data.table(rep.no=rep(1:reps, each=length(labels)), bin.no=rep(1:length(labels), reps), bin=rep(labels, reps))
results <- merge(frame, catch, by=c("rep.no", "bin"), all=TRUE)
results <- merge(results, effort, by=c("rep.no", "bin"), all=TRUE)
results[, cpue := catch / effort]
results[is.na(results)] <- 0
results <- results[order(rep.no, bin.no)]
#Calculate rates of change, if necessary
if(RoC==TRUE) {
results[, RoC := c(diff(cpue), NA)]
results[bin==labels[length(labels)], RoC := NA]
RoC <- "RoC"
} else {RoC <- NULL}
#Create summary dataset
summary <- sim.summ(results, summ.col=c("cpue", RoC), quant.list=quant.list)
#Define small-n polygons if required
if(length(small.n > 0)) {
n <- results[,median(effort), by="bin"]
ylim <- max(results$cpue*1.1)
boxes <- list()
for(i in 1:length(small.n)) {
weak <- c(FALSE, n$V1 < small.n[i], FALSE)
weak.x <- rep((0:length(labels))[diff(weak)==1|diff(weak)==-1], each=2)+0.5
weak.y <- rep(c(-1,ylim, ylim, -1), length(weak.x)/4)
coords <- data.frame(cbind(weak.x, weak.y))
coords <- split(coords, rep(1:(nrow(coords)/4), each=4))
boxes[[i]] <- coords
}
}
#Return results
results <- list(full=results, summary=summary)
if(length(small.n)>0) {results$small.n <- boxes}
results
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.