#' @title dE.single - Estimate single-observer line-transect distance function
#'
#' @description Fits a detection function to off-transect
#' distances collected by a single observer.
#'
#' @inheritParams dfuncEstim
#'
#' @param formula A standard formula object. For example, \code{dist ~ 1},
#' \code{dist ~ covar1 + covar2}). The left-hand side (before \code{~})
#' is the name of the vector containing off-transect or radial detection distances.
#' The right-hand side contains the names of covariate
#' vectors to fit in the detection
#' function, and potentially group sizes.
#' Covariates can be either detection level
#' or transect level and can appear in \code{data} or exist in the
#' global working environment. Regular R scoping
#' rules apply.
#'
#'
#' @param likelihood String specifying the likelihood to fit. Built-in
#' likelihoods at present are "halfnorm", "hazrate", and "negexp".
#'
#' @param w.lo Lower or left-truncation limit of the distances in distance data.
#' This is the minimum possible off-transect distance. Default is 0. If
#' \code{w.lo} is greater than 0, it must be assigned measurement units
#' using \code{units(w.lo) <- "<units>"} or
#' \code{w.lo <- units::set_units(w.lo, "<units>")}.
#' See examples in the help for \code{set_units}.
#'
#' @param w.hi Upper or right-truncation limit of the distances
#' in \code{dist}. This is the maximum off-transect distance that
#' could be observed. If unspecified (i.e., NULL),
#' right-truncation is set to the maximum of the observed
#' distances. If \code{w.hi} is specified, it must have associated
#' measurement units. Assign measurement units
#' using \code{units(w.hi) <- "<units>"} or
#' \code{w.hi <- units::set_units(w.hi, "<units>")}.
#' See examples in the help for \code{set_units}.
#'
#' @param expansions A scalar specifying the number of terms
#' in \code{series} to compute. Depending on the series,
#' this could be 0 through 5. The default of 0 equates
#' to no expansion terms of any type. No expansion terms
#' are allowed (i.e., \code{expansions} is forced to 0) if
#' covariates are present in the detection function
#' (i.e., right-hand side of \code{formula} includes
#' something other than \code{1}).
#'
#' @param series If \code{expansions} > 0, this string
#' specifies the type of expansion to use. Valid values at
#' present are 'simple', 'hermite', and 'cosine'.
#'
#' @param x.scl The x coordinate (a distance) at which the
#' detection function will be scaled. \code{g.x.scl} can be a distance
#' or the string "max".
#' When \code{x.scl} is specified (i.e., not 0 or "max"), it must have measurement
#' units assigned using either \code{library(units);units(x.scl) <- '<units>'}
#' or \code{x.scl <- units::set_units(x.scl, <units>)}. See
#' \code{units::valid_udunits()} for valid symbolic units.
#'
#' @param g.x.scl Height of the distance function at coordinate \code{x}.
#' The distance function
#' will be scaled so that g(\code{x.scl}) = \code{g.x.scl}.
#' If \code{g.x.scl} is not
#' a data frame, it must be a numeric value (vector of length 1)
#' between 0 and 1.
#'
#' @param warn A logical scalar specifying whether to issue
#' an R warning if the estimation did not converge or if one
#' or more parameter estimates are at their boundaries.
#' For estimation, \code{warn} should generally be left at
#' its default value of \code{TRUE}. When computing bootstrap
#' confidence intervals, setting \code{warn = FALSE}
#' turns off annoying warnings when an iteration does
#' not converge. Regardless of \code{warn}, after
#' completion all messages about
#' convergence and boundary conditions are printed
#' by \code{print.dfunc}, \code{print.abund}, and
#' \code{plot.dfunc}.
#'
#' @param outputUnits A string specifying the symbolic measurement
#' units for results. Valid units are listed in \code{units::valid_udunits()}.
#' The strings for common distance symbolic units are:
#' "m" - meters, "ft" - feet, "cm" - centimeters, "mm" -
#' millimeters, "mi" - miles, "nmile" -
#' nautical miles ("nm" is nano meters), "in" - inches,
#' "yd" - yards, "km" - kilometers, "fathom" - fathoms,
#' "chains" - chains, and "furlong" - furlongs.
#' If \code{outputUnits} is unspecified (NULL),
#' output units will be the same as those on
#' distances in \code{data}.
#'
#'
#' @section Group Sizes:
#' To specify non-unity group sizes, use \code{groupsize()}
#' on the RHS of \code{formula}. When group sizes are not all 1, they must appear in a column
#' of the 'detections' list-column of \code{data}.
#' For example, \code{d ~ habitat + groupsize(number)} specifies
#' distances in column \code{d}, one covariate
#' named \code{habitat}, and that column \code{number}
#' contains the number of individuals
#' associated with each detection. If group sizes are not specified,
#' all group sizes are assumed to be 1.
#'
#' @section Contrasts:
#' Factor contrasts in \code{Rdistance} are specified
#' the same way as in \code{lm} or \code{glm}.
#' By default, \code{Rdistance} uses
#' contrasts in \code{getOption("contrasts")}. To change contrasts, use a statement
#' like \code{options(contrasts = c(unordered = "contr.SAS",
#' ordered = "contr.poly"))}. Or, to set contrasts for a
#' specific factor in the input data frame, use
#' \code{contrasts(df$A) <- "contr.sum"} or similar.
#' See \code{\link{contrasts}} or the \code{contrasts.arg}
#' of \code{\link{model.matrix}}.
#'
#' @section Transect types:
#' \code{Rdistance} accommodates two kinds of transects: continuous and point.
#' Detections can occur at any point on continuous transects.
#' \code{Rdistance} calls these 'line-transects' even though routes are not
#' necessarily a straight line.
#' On point transects, detections occur at a series of stops
#' (points). \code{Rdisance} calls these point-transects. Transects are the basic
#' sampling unit in both cases. \code{Rdistance} assumes each row of \code{data}
#' contains information from one transect. See \code{\link{RdistDf}} for
#' more details.
#'
#' @section Measurement Units:
#' As of \code{Rdistance} version 3.0.0, measurement units are
#' require on all physical distances.
#' Requiring units ensures that internal calculations and results
#' (e.g., ESW and abundance) are correct
#' and that output units are clear.
#' Physical distances are required on
#' off-transect distances, radial distances, truncation distances
#' (\code{w.lo}, unless it is zero; and \code{w.hi}, unless it is NULL),
#' scale locations (\code{x.scl}, unless it is zero),
#' line-transect lengths, and study area size. All units are
#' 1-dimensional except those on study area, which are 2-dimensional.
#'
#' Physical measurement units can vary. For example,
#' off-transect distances can be meters ("m"), \code{w.hi} can be inches ("in"),
#' and \code{w.lo} can be kilometers ("km"). Internally, all distances are
#' converted to the units specified by \code{outputUnits}
#' (or the units of input distances if
#' \code{outputUnits} is NULL), and
#' all output is reported
#' in units of \code{outputUnits}. Valid conversions must exist between
#' units or an error is thrown. For example, meters cannot be converted
#' into hectares.
#'
#' Measurement units can be assigned using
#' \code{units()<-} after attaching the \code{units}
#' package or with \code{x <- units::set_units(x, "<units>")}.
#' See \code{units::valid_udunits()}
#' for a list of valid symbolic units.
#'
#' If measurements are truly unit-less, or measurement units are unknown,
#' set \code{options(Rdist_requireUnits = FALSE)}. This suppresses
#' all unit checks and conversions. Users are on their own
#' to make sure inputs are scaled correctly and that output units are known.
#'
#' @details
#' Optimization and estimation controls can be modified using \code{options()}.
#' See \code{\link{RdistanceControls}}.
#'
#' @return An object of class 'dfunc'. Objects of class 'dfunc'
#' are lists containing the following components:
#'
#' \item{par}{The vector of estimated parameter values.
#' Length of this vector for built-in likelihoods is one
#' (for the function's parameter) plus the
#' number of expansion terms plus one if the likelihood is
#' 'hazrate' (which has
#' two parameters). }
#'
#' \item{varcovar}{The variance-covariance matrix for coefficients
#' of the distance function, estimated by the inverse of the fit's Hessian
#' evaluated at the estimates. Rdistance estimates the
#' Hessian as the second derivative of the log likelihood surface
#' at the final estimates, where second derivatives are estimated by
#' numeric differentiation (see \code{\link{secondDeriv}}. There is no guarantee this
#' matrix is positive-definite and should be viewed with caution.
#' Error estimates derived from bootstrapping are generally
#' more reliable. I.e., re-compute coefficient confidence intervals
#' using the bootstrap values in component \code{$B} of an abundance object.}
#'
#' \item{loglik}{The maximized value of the log likelihood.}
#'
#' \item{convergence}{The convergence code. This code
#' is returned by \code{optim} or \code{nlminb}. Values other than 0 indicate suspect
#' convergence.}
#'
#' \item{likelihood}{The name of the likelihood. This is
#' the value of the argument \code{likelihood}. }
#'
#' \item{w.lo}{Left-truncation value used during the fit.}
#'
#' \item{w.hi}{Right-truncation value used during the fit.}
#'
#' \item{mf}{A modelframe of detections within the strip
#' or circle used in the fit. Column 'dist' contains the
#' observed distances.
#' Column 'offset(...)' contains group sizes associated with
#' the values of 'dist'. Group
#' sizes are only used in \code{abundEstim}. This model frame
#' contains only non-missing distances between \code{w.lo} and \code{w.hi}. }
#'
#' \item{model.frame}{A \code{model.frame} object containing observed distances
#' (the 'response'), covariates specified in the formula, and group sizes if they
#' were specified. If specified, the name of the group size column is "offset(-variable-)",
#' not "groupsize(-variable-)", because internally it is easier to treat group sizes
#' as an offset in the model. This component is a proper \code{model.frame} and contains
#' both 'terms' and 'contrasts' attributes. }
#'
#' \item{siteID.cols}{A vector containing the transect ID column names in \code{detectionData}
#' and \code{siteData}. Transect IDs can be a composite of two or more columns and hence
#' this component can have length greater than 1. }
#'
#' \item{expansions}{The number of expansion terms used
#' during estimation.}
#'
#' \item{series}{The type of expansion used during estimation.}
#'
#' \item{call}{The original call of this function.}
#'
#' \item{call.x.scl}{The \emph{input} or user requested
#' distance at which the distance function is scaled. }
#'
#' \item{call.g.x.scl}{The \code{input} value specifying the
#' height of the distance function at a distance
#' of \code{call.x.scl}. }
#'
#' \item{call.observer}{The value of input parameter \code{observer}.
#' The input \code{observer} parameter is only applicable when
#' \code{g.x.scl} is a data frame.}
#'
#' \item{fit}{The fitted object returned by \code{optim}.
#' See documentation for \code{optim}.}
#'
#' \item{factor.names}{The names of any factors in \code{formula}. }
#'
#' \item{pointSurvey}{The input value of \code{pointSurvey}.
#' This is TRUE if distances are radial from a point. FALSE
#' if distances are perpendicular off-transect. }
#'
#' \item{formula}{The formula specified for the detection function.}
#'
#' \item{control}{A list containing values of the 'control' parameters
#' set by \code{RdistanceControls}.}
#'
#' \item{outputUnits}{The measurement units used for output. All
#' distance measurements are converted to these units internally. }
#'
#' \item{x.scl}{The \emph{actual} distance at which
#' the distance function is scaled to some value.
#' i.e., this is the actual \emph{x} at
#' which g(\emph{x}) = \code{g.x.scl}.
#' Note that \code{call.x.scl} = \code{x.scl} unless
#' \code{call.x.scl} == "max", in which case \code{x.scl} is the
#' distance at which \emph{g}() is maximized. }
#'
#' \item{g.x.scl}{The \emph{actual} height of the distance function
#' at a distance of \code{x.scl}. Note that \code{g.x.scl} =
#' \code{call.g.x.scl} unless \code{call.g.x.scl}
#' is a multiple observer data frame, in which case \code{g.x.scl} is the
#' actual height of the distance function at \code{x.scl} computed
#' from the multiple observer data frame. }
#'
#' @references Buckland, S.T., D.R. Anderson, K.P. Burnham, J.L. Laake, D.L. Borchers,
#' and L. Thomas. (2001) \emph{Introduction to distance sampling: estimating
#' abundance of biological populations}. Oxford University Press, Oxford, UK.
#'
#' @seealso \code{\link{abundEstim}}, \code{\link{autoDistSamp}}.
#' Likelihood-specific help files (e.g., \code{\link{halfnorm.like}}).
#'
#' @examples
#' # Load example sparrow data (line transect survey type)
#' data(sparrowDf)
#'
#' dfunc <- dfuncEstim(data = sparrowDf
#' , formula = dist ~ 1)
#' dfunc
#' plot(dfunc)
#'
#' @keywords model
#' @export
dE.single <- function( data
, formula
, likelihood = "halfnorm"
, w.lo = units::set_units(0,"m")
, w.hi = NULL
, expansions = 0
, series = "cosine"
, x.scl = w.lo
, g.x.scl = 1
, warn = TRUE
, outputUnits = NULL
){
# if ( likelihood == "uniform" ){
# .Deprecated(new = "logistic.like"
# , package = "Rdistance"
# , msg = paste("'unform.like' is depricated. Use 'logistic'.\n"
# , "Switching to 'logistic' likelihood.")
# , old = "uniform.like")
# likelihood <- "logistic"
# }
# Parse the formula and make a model list ----
# all parameters go into parseModel because they need to become
# components for the output list, not just formula.
# All checking is done in parseModel(), including
# check of units (via checkUnits()).
modelList <- Rdistance::parseModel(data = data
, formula = formula
, likelihood = likelihood
, w.lo = w.lo
, w.hi = w.hi
, expansions = expansions
, series = series
, x.scl = x.scl
, g.x.scl = g.x.scl
, outputUnits = outputUnits
)
strt.lims <- Rdistance::startLimits(modelList)
# Perform optimization
fit <- mlEstimates( ml = modelList
, strt.lims = strt.lims
)
# Assemble results
ans <- c(fit, modelList)
class(ans) <- "dfunc"
if ( ans$likelihood != "Gamma" ){
# not absolutely necessary.
# Could estimate these later in print and plot methods.
# but this saves a little time.
gx <- gxEstim(ans)
ans$x.scl <- gx$x.scl
ans$g.x.scl <- gx$g.x.scl
} else {
# Special case of Gamma
ans$x.scl <- x.scl
ans$g.x.scl <- g.x.scl
}
# ---- Check parameter boundaries ----
fuzz <- getOption("Rdistance_fuzz")
if (ans$convergence != 0) {
if (warn) warning(ans$message)
low.bound <- FALSE
high.bound <- FALSE
} else {
low.bound <- ans$par <= (ans$limits$low + fuzz)
high.bound <- ans$par >= (ans$limits$high - fuzz)
}
if (any(low.bound)) {
# if we are here, model converged but to limit
ans$convergence <- -1
messL <- paste(paste(strt.lims$names[low.bound], "parameter at lower boundary.")
, collapse = "; ")
ans$message <- messL
if (warn) warning(ans$message)
}
else {
messL <- NULL
}
if (any(high.bound)) {
ans$convergence <- -1
messH <- paste(paste(strt.lims$names[high.bound], "parameter at upper boundary.")
, collapse = "; ")
ans$message <- c(messL, messH)
if (warn) warning(ans$message)
}
ans
} # end function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.