#' @title Abundance point estimates
#'
#' @description Estimate abundance given a distance function,
#' a "merged" data frame containing detections and transect lengths, area,
#' and the number of sides surveyed (if line-transects).
#' This is called internally by \code{abundEstim}. Most users will call
#' \code{abundEstim} to estimate abundance.
#'
#' @param dfunc An estimate distance function (see \code{dfuncEstim}).
#'
#' @param data A data frame containing distance observations, transects,
#' and lengths. This data frame must have a column named 'siteID' that identifies
#' unique sites (transects or points). If observations were made on line-transects, this
#' data frame must also have a column named
#' by the \code{lengthColumn} parameter that contains transect lengths. NA
#' length transects are accepted and are dropped when computing total
#' transect length. Only observations on non-NA-length transects are toward density.
#'
#' @param surveyedSides The number of sides of the transect that were surveyed. Either
#' 1 or 2. Only applies to line transects.
#'
#' @inheritParams abundEstim
#'
#' @inheritParams dfuncEstim
#'
#' @inherit abundEstim details
#'
#' @return A list containing the following components:
#'
#' \item{density}{Estimated density in the surveyed area.}
#'
#' \item{abundance}{Estimated abundance on the study area.}
#'
#' \item{n.groups}{The number of detections (not individuals, unless all group sizes = 1)
#' used to estimate density and abundance.}
#'
#' \item{n.seen}{The number of individuals (sum of group sizes) used to
#' estimate density and abundance.}
#'
#' \item{area}{Total area of inference. Study area size}
#'
#' \item{surveyedUnits}{Number of surveyed sites. This is total transect length
#' for line-transects and number of points for point-transects. This total transect
#' length does not include NA transects.}
#'
#' \item{surveyedSides}{Number of sides (1 or 2) of transects surveyed. Only relevant for line-transects.}
#'
#' \item{avg.group.size}{Average group size on non-NA transects}
#'
#' \item{w}{Strip width. }
#'
#' \item{pDetection}{Probability of detection.}
#'
#' For line-transects that do not involve covariates, x$density
#' is x$n.seen / (x$surveyedSides * x$w * x$pDetection * x$surveyedUnits)
#'
#'
#'
#' @seealso \code{\link{dfuncEstim}}, \code{\link{abundEstim}}
#'
#' @keywords model
#'
#' @export
estimateN <- function(dfunc
, data
, area = NULL
, surveyedSides
, lengthColumn
, control = RdistanceControls()
){
if( !inherits(area, "units") & control$requireUnits ){
if( !is.null(area) ){
# If we are here, area did not come with units, we require units, and it's not null: ERROR
stop(paste("Study area measurement units are required."
, "Assign units to area by attaching 'units' package then:\n"
, "units(area) <- '<units of measurment>'."
, "'m^2' (sq meters), 'ha' (hectares), 'km^2', and 'acre' are acceptable.\n"
, "See units::valid_udunits()"))
}
# if we are here, area is NULL: Report abundance in 1 square unit of measure
area <- units::set_units(1, dfunc$outputUnits, mode = "standard")^2
} else if( control$requireUnits ){
# if we are here, area has units and it is not null (because you cannot
# assign units to NULL) but it could be NA)
# Convert area units to square of outputUnits.
# This converts units like "hectare" to "m^2". If cannot convert, and error is thrown here
squareOutputUnits <- units::set_units(1, dfunc$outputUnits, mode = "standard")^2
area <- units::set_units(area, squareOutputUnits, mode = "standard")
}
# ---- Check whether point survey "lengths" are present ----
if( dfunc$pointSurvey ){
if( !( lengthColumn %in% names(data)) ){
# no point "length". Use siteID as length. Below, we count non-missing site ids
lengthColumn <- "siteID"
}
}
# ---- Find observations on NA length transects inside the strip ----
mt <- terms(dfunc$model.frame)
distColumn <- all.vars(mt)[attr(mt, "response")]
inStrip <- (dfunc$w.lo <= data[,distColumn]) &
(data[,distColumn] <= dfunc$w.hi)
onPosTransect <- !is.na(data[, lengthColumn ]) # use distance in dfunc, but do not count groupsize
distPresent <- !is.na(data[, distColumn]) # count groupsize, but no distances
obsInd <- inStrip & onPosTransect & distPresent
# ---- Measurement units check. ----
if( !dfunc$pointSurvey ){
if( !inherits(data[,lengthColumn], "units") & control$requireUnits ){
stop(paste("Transect length units are required.\n"
, "Assign units to transect lengths in site data frame with statements like:\n"
, "siteData$length <- units::set_units(siteData$length, '<units>')\n"
, "'m' (meters), 'ft' (feet), 'km' (kilometers), etc. are all acceptable.\n"
, "See units::valid_udunits()"))
} else if( control$requireUnits ){
# if we are here, length has units. convert them to units used during estimation.
# Input dfunc must have a $outputUnits component.
data[,lengthColumn] <- units::set_units(data[,lengthColumn], dfunc$outputUnits, mode = "standard")
}
}
# ---- Estimate numerator of abundance ----
# REMEMBER: component $detections of dfunc has been truncated to (w.lo, w.hi)
# component $model.frame has NOT been truncated to the strip
# If dfunc has covariates, esw is a vector of length n. No covars, esw is scalar
# If dfunc is pointSurveys, esw is EDR. If line surveys, esw is ESW.
esw <- effectiveDistance(dfunc
, newdata = data[ obsInd, , drop = FALSE])
w <- dfunc$w.hi - dfunc$w.lo
if (dfunc$pointSurvey) {
phat <- (esw / w)^2 # for points
} else {
phat <- esw / w # for lines
}
# phat should be unit-less; check just to be sure, if so drop "1" units
# Odd: sometimes phat has units "1", sometimes "m/m". Either way, test and remove.
if( isUnitless(phat) ){
phat <- units::drop_units(phat)
}
if( !is.null(attr(mt, "offset")) ){
gsCol <- all.vars(mt)[attr(mt, "offset")]
gs <- data[, gsCol]
gs <- gs[ obsInd ]
} else {
gs <- rep(1, sum(obsInd))
}
nhat <- gs / phat # inflated counts one per detection
# ---- Compute denominator of abundance, and abundance itself ----
if (FALSE) {
# MOVE ALL THIS TO A PREDICT METHOD
# if(!is.null(dfunc$covars)){
# covarsInModel <- attr(terms(dfunc$model.frame), "term.labels")
# allSiteLevelCovs <- all( covarsInModel %in% names(siteData) )
# if( !allSiteLevelCovs ) {
# nonSiteLevCovs <- paste("'",covarsInModel[!(covarsInModel %in% names(siteData))],
# "'",collapse = ", ", sep="")
# mess <-"Cannot estimate site-level abundance. bySite is TRUE but detection-level "
# if(regexpr(",", nonSiteLevCovs) < 0) {
# mess <- paste0(mess, "covariate ",
# nonSiteLevCovs,
# " is in the model. Options: remove this covariate from dfunc; ",
# "set bySite=FALSE; or, summarize it to the site-level,",
# " place it in siteData under same name, and re-run ",
# "abundEstim WITHOUT re-running dfuncEstim.")
# } else {
# mess <- paste0(mess, "covariates ",
# nonSiteLevCovs,
# " are in the model. Options: remove these covariates from dfunc; ",
# "set bySite=FALSE; or, summarize them to the site-level,",
# " place them in siteData under same names, and re-run ",
# "abundEstim WITHOUT re-running dfuncEstim.")
#
# }
# stop(mess)
# }
# }
#
# # ---- sum statistics by siteID
#
# nhat.df <- data.frame(siteID = detectionData$siteID, abundance=nhat)
# nhat.df <- tapply(nhat.df$abundance, nhat.df$siteID, sum)
# nhat.df <- data.frame(siteID = names(nhat.df), abundance=nhat.df)
#
# observedCount <- tapply(detectionData$groupsize, detectionData$siteID, sum)
# observedCount <- data.frame(siteID = names(observedCount), observedCount=observedCount)
#
# nhat.df <- merge(observedCount, nhat.df) # should match perfectly
#
# # If detectionData$siteID is a factor and has all levels, even zero
# # sites, don't need to do this. But, to be safe merge back to
# # get zero transects and replace NA with 0
#
# # Must do this to get pDetection on 0-sites. Site must have
# # non-missing covars if dfunc has covars
# esw <- effectiveDistance(dfunc, siteData)
# siteData <- data.frame(siteData, esw=esw)
#
# if (dfunc$pointSurvey) {
# siteData$pDetection <- (siteData$esw / w)^2 # for points
# } else {
# siteData$pDetection <- siteData$esw / w # for lines
# }
#
#
# nhat.df <- merge(siteData, nhat.df, by="siteID", all.x=TRUE)
# nhat.df$observedCount[is.na(nhat.df$observedCount)] <- 0
# nhat.df$abundance[is.na(nhat.df$abundance)] <- 0
#
# # Calculate density
# if (dfunc$pointSurvey) {
# sampledArea <- pi * w^2 # area samled for single point
# } else {
# sampledArea <- 2 * w * nhat.df$length # area sampled for single line
# }
# nhat.df$density <- (nhat.df$abundance * area) / sampledArea
#
#
# # Calculate effective area ("effective" as in ESW)
# # This is suggested as the offset term in a GLM model of density
# if (dfunc$pointSurvey) {
# nhat.df$effArea <- (pi * nhat.df$esw^2) / area # for points
# } else {
# nhat.df$effArea <- (nhat.df$length * nhat.df$esw * 2) / area # for lines
# }
#
#
# # Replace the column name for "esw", which is edr for points
# if (dfunc$pointSurvey) {
# names(nhat.df)[names(nhat.df)=="esw"] <- "EDR" # for points
# } else {
# names(nhat.df)[names(nhat.df)=="esw"] <- "ESW" # for lines
# }
} else {
# ---- not bySite ----
# ---- Compute number of points or transect length ----
# Note: drop NA length transects or points here, which may have detections and distance measurements.
# There are dups because data is the merged data, and has one row per detection and there
# are multiple detections per transect or point.
if( dfunc$pointSurvey ){
dups <- duplicated(data$siteID)
totSurveyedUnits <- sum(!is.na(data[!dups,,drop=FALSE][,lengthColumn]))
} else {
dups <- duplicated(data$siteID)
totSurveyedUnits <- sum(data[!dups,,drop=FALSE][,lengthColumn], na.rm = TRUE)
}
if(dfunc$pointSurvey){
dens <- sum(nhat) / (pi * w^2 * totSurveyedUnits)
} else {
dens <- sum(nhat) / (surveyedSides * w * totSurveyedUnits)
}
nhat.df <- dens * area
# nhat.df should be unitless
if( isUnitless(nhat.df) ){
nhat.df <- units::drop_units(nhat.df)
} else {
warning(paste("Strange measurement units have been detected because abundance is not unitless.\n",
"Check measurement unit compatability among distances, transect lengths, and area."))
}
nhat.df <- list(density = dens
, abundance = nhat.df
, n.groups = length(gs)
, n.seen = sum(gs)
, area = area
, surveyedUnits = totSurveyedUnits
, surveyedSides = surveyedSides
, avg.group.size = mean(gs)
, range.group.size = range(gs)
, w = w
, pDetection = phat
)
}
# some interesting tidbits:
# sampled area = tot.trans.len * 2 * (dfunc$w.hi - dfunc$w.lo)
# sum(1/phat) = what Distance calls "N in covered region". This is
# estimated number of groups in the sampled area.
# sum(1/phat)*mean(dfunc$detections$groupsize) = an estimate of individuals
# in the sampled area. This is probably how Distance would do it.
# sum(dfunc$detections$groupsize/phat) = the HT estimate of individuals
# in the sampled area. How RDistance does it. This and the Distance
# estimate are very close. Only difference is ratio of sums or sum of
# ratios.
# sum(detectionData$groupsize) / (sum(1/phat)*mean(detectionData$groupsize)) =
# n.indivs / (nhat.groups*mean.grp.size) = n.groups / nhat.groups =
# what Distance calls "Average p". This is different than mean(phat)
# the way Rdistance calculates it.
return(nhat.df)
} # end estimate.nhat function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.