Nothing
#' @title Estimate a detection function from distance-sampling data
#'
#' @description Fit a specific detection function to off-transect
#' or off-point (radial) distances using maximum likelihood.
#' Distance functions are fitted to individual
#' distance observations, not histogram bin heights, despite plot methods
#' that draw histogram bars.
#'
#' @param formula A standard formula object (e.g., \code{dist ~ 1},
#' \code{dist ~ covar1 + covar2}). The left-hand side (before \code{~})
#' is the name of the vector containing distances (off-transect or
#' radial). The right-hand side (after \code{~})
#' contains the names of covariate vectors to fit in the detection
#' function. Covariates can be either detection level and appear in \code{detectionData}
#' or transect level and appear in \code{siteData}. Regular R scoping
#' rules apply.
#'
#' \bold{Group Sizes:} Non-unity group sizes are specified using \code{groupsize()}
#' in the formula. That is, when group sizes are not all 1, they must
#' be entered as a column in \code{detectionData} and specified
#' using \code{groupsize()} as part of \code{formula}. For example,
#' \code{d ~ habitat + groupsize(number)} specifies that
#' distances appear in variable \code{d}, one covariate
#' named \code{habitat} is to be fitted, and column \code{number}
#' contains the number of individuals
#' associated with each detection. If group sizes are not specified,
#' all group sizes are assumed to be 1.
#'
#'
#' @param detectionData A data frame containing detection distances
#' (either perpendicular for line-transect or radial for point-transect
#' designs), with one row per detected object or group.
#' This data frame must contain at least the following
#' information:
#' \itemize{
#' \item Detection Distances: A single column containing
#' detection distances must be specified on the left-hand
#' side of \code{formula}. As of Rdistance version 3.0.0,
#' the detection distances must have measurement units attached.
#' Attach measurements units to distances using \code{library(units);units()<-}.
#' For example, \code{library(units)} followed by \code{units(df$dist) <- "m"} or
#' \code{units(df$dist) <- "ft"} will work. Alternatively,
#' \code{df$dist <- units::set_units(df$dist, "m")} also works.
#'
#' \item Site IDs: The ID of the transect or point
#' (i.e., the 'site') where each object or group was detected.
#' The site ID column(s) (see arguments \code{transectID} and
#' \code{pointID}) must
#' specify the site (transect or point) so that this
#' data frame can be merged with \code{siteData}.
#'
#' \item In a later release, \code{Rdistance} will allow detection-level
#' covariates. When that happens, detection-level
#' covariates will appear in this data frame.
#'
#' }
#' See example data set \code{\link{sparrowDetectionData}}.
#' See also \bold{Input data frames} below
#' for information on when \code{detectionData} and
#' \code{siteData} are required inputs.
#'
#' @param siteData A data.frame containing site (transect or point)
#' IDs and any
#' \emph{site level} covariates to include in the detection function.
#' Every unique surveyed site (transect or point) is represented on
#' one row of this data set, whether or not targets were sighted
#' at the site. See arguments \code{transectID} and
#' \code{pointID} for an explanation of the way in which distance and site
#' data frames are merged. See
#' section \bold{Relationship between data frames (transect and point ID's)}
#' for additional details.
#'
#' See \bold{Data frame requirements} for situations in which
#' \code{detectionData} only, \code{detectionData} and \code{siteData}, or
#' neither are required.
#'
#'
#' @param likelihood String specifying the likelihood to fit. Built-in
#' likelihoods at present are "uniform", "halfnorm",
#' "hazrate", "negexp", and "Gamma". See vignette for a way to use
#' user-define likelihoods.
#'
#' @param pointSurvey A logical scalar specifying whether input data come
#' from point-transect surveys (TRUE),
#' or line-transect surveys (FALSE).
#'
#' @param w.lo Lower or left-truncation limit of the distances in distance data.
#' This is the minimum possible off-transect distance. Default is 0. If
#' \code{w.lo} is greater than 0, it must be assigned measurement units
#' using \code{units(w.lo) <- "<units>"} or
#' \code{w.lo <- units::set_units(w.lo, "<units>")}.
#' See examples in the help for \code{set_units}.
#'
#' @param w.hi Upper or right-truncation limit of the distances
#' in \code{dist}. This is the maximum off-transect distance that
#' could be observed. If unspecified (i.e., NULL),
#' right-truncation is set to the maximum of the observed
#' distances. If \code{w.hi} is specified, it must have associated
#' measurement units. Assign measurement units
#' using \code{units(w.hi) <- "<units>"} or
#' \code{w.hi <- units::set_units(w.hi, "<units>")}.
#' See examples in the help for \code{set_units}.
#'
#' @param expansions A scalar specifying the number of terms
#' in \code{series} to compute. Depending on the series,
#' this could be 0 through 5. The default of 0 equates
#' to no expansion terms of any type. No expansion terms
#' are allowed (i.e., \code{expansions} is forced to 0) if
#' covariates are present in the detection function
#' (i.e., right-hand side of \code{formula} includes
#' something other than \code{1}).
#'
#' @param series If \code{expansions} > 0, this string
#' specifies the type of expansion to use. Valid values at
#' present are 'simple', 'hermite', and 'cosine'.
#'
#' @inheritParams F.gx.estim
#'
#' @param warn A logical scalar specifying whether to issue
#' an R warning if the estimation did not converge or if one
#' or more parameter estimates are at their boundaries.
#' For estimation, \code{warn} should generally be left at
#' its default value of \code{TRUE}. When computing bootstrap
#' confidence intervals, setting \code{warn = FALSE}
#' turns off annoying warnings when an iteration does
#' not converge. Regardless of \code{warn}, after
#' completion all messages about
#' convergence and boundary conditions are printed
#' by \code{print.dfunc}, \code{print.abund}, and
#' \code{plot.dfunc}.
#'
#' @param transectID A character vector naming the transect ID column(s) in
#' \code{detectionData} and \code{siteData}. If transects are
#' not identified in columns named 'siteID' (the default for both data frames), you need
#' to specify which column(s) uniquely identify transects. \code{transectID} can have length
#' greater than 1, in which case unique transects are identified by the composite columns.
#'
#' @param pointID When point-transects are used, this is the
#' ID of points on a transect. When \code{pointSurvey}=TRUE,
#' the combination of \code{transectID} and
#' \code{pointID} specify unique sampling sites.
#' See \bold{Input data frames}.
#'
#' If single points are surveyed,
#' meaning surveyed points were not grouped into transects, each 'transect' consists
#' of one point. In this case, set \code{transectID} equal to
#' the point's ID and set \code{pointID} equal to 1 for all points.
#'
#' @param control A list containing optimization control parameters such
#' as the maximum number of iterations, tolerance, the optimizer to use,
#' etc. See the
#' \code{\link{RdistanceControls}} function for explanation of each value,
#' the defaults, and the requirements for this list.
#' See examples below for how to change controls.
#'
#' @param outputUnits A string giving the symbolic measurment
#' units that results should be reported in. Any
#' distance measurement unit in \code{units::valid_udunits()}
#' will work. The strings for common distance symbolic units are:
#' "m" for meters, "ft" for feet, "cm" for centimeters, "mm" for
#' millimeters, "mi" for miles, "nmile" for
#' nautical miles ("nm" is nano meters), "in" for inches,
#' "yd" for yards, "km" for kilometers, "fathom" for fathoms,
#' "chains" for chains, and "furlong" for furlongs.
#' If \code{outputUnits} is unspecified (NULL),
#' output units are the same as distance measurements units in
#' \code{data}.
#'
#' @section Transect types:
#' \code{Rdistance}
#' accommodates two kinds of transects: continuous and point.
#' On continuous transects detections can occur at
#' any point along the route, and these are line-transects.
#' On point transects detections can only
#' occur at a series of stops (points), and these are
#' point-transects.
#' Transects are the basic sampling unit in both cases.
#' Columns named in \code{transectID} are
#' sufficient to specify unique line-transects.
#' The combination of \code{transectID} and
#' \code{pointID} specify unique sampling locations along point-transects.
#' See \bold{Input data frames} below for more detail.
#'
#' @section Input data frames:
#' To save space and to easily specify
#' sites without detections,
#' all site ID's, regardless of whether a detection occurred there,
#' and \emph{site level} covariates are stored in
#' the \code{siteData} data frame. Detection distances and group
#' sizes are measured at the \emph{detection level} and
#' are stored in the
#' \code{detectionData} data frame.
#'
#' \subsection{Data frame requirements}{The following explains
#' conditions under which various combinations of the input data frames
#' are required.
#'
#' \enumerate{
#' \item \bold{Detection data and site data both required:}\cr
#' Both \code{detectionData} and \code{siteData}
#' are required if \emph{site level} covariates are
#' specified on the right-hand side of \code{formula}.
#' \emph{Detection level} covariates are not currently allowed.
#' Both \code{detectionData} and
#' \code{siteData} data frames are required to estimate abundance
#' later in \code{abundEstim}.
#'
#' \item \bold{Detection data only required:}\cr
#' \code{detectionData} only is required when
#' covariates are are not included in the distance function (i.e., the right-hand side of
#' \code{formula} is "~1" or "~groupsize(groupSize)"). Note that \code{dfuncEstim}
#' does not need to know transect IDs (or group sizes)
#' in order to estimate a distance function; but, group sizes and
#' transect IDs are stored and used to estimate abundance
#' in function \code{abundEstim}. Both the \code{detectionData} and
#' \code{siteData} data frames are required in \code{abundEstim}.
#'
#' \item \bold{Neither detection data nor site data required}\cr
#' Neither \code{detectionData} nor \code{siteData}
#' are required if all variables specified in \code{formula}
#' are within the scope of \code{dfuncEstim} (e.g., in the global working
#' environment) and abundance estimates are not required.
#' Regular R scoping rules apply when the call
#' to \code{dfuncEstim} is embedded in a function.
#' This case is will produce distance functions only.
#' Abundance cannot later be estimated because transects and transect lengths cannot
#' be specified outside of a data frame. If abundance will be estimated,
#' use either case 1 or 2.
#' }
#'
#' }
#'
#' \subsection{Relationship between data frames (transect and point ID's)}{
#' The input data frames, \code{detectionData} and \code{siteData},
#' must be merge-able on unique sites. For line-transects,
#' site ID's specify transects or routes and are unique values of
#' the \code{transectID} column in \code{siteData}. In this case,
#' the following merge must work:
#' \code{merge(detectionData,siteData,by=transectID)}.
#'
#' For point-transects,
#' site ID's specify individual points and are unique values
#' of the combination \code{paste(transectID,pointID)}.
#' In this case, the following merge must work:
#' \code{merge(detectionData,siteData,by=c(transectID, pointID)}.
#'
#' By default, \emph{transects} are unique combinations of the
#' common variables in the \code{detectionData} and \code{siteData} data frames
#' if both data frames are specified (i.e., unique values of
#' \code{intersect(names(detectionData), names(siteData))}). If \code{siteData}
#' is not specified and \code{transectID} is not given, transects are assumed to
#' be identified in a column named \code{siteID} in \code{detectionData}.
#'
#' Either way
#' (i.e., either \code{transectID} = "siteID" or specified as something else),
#' the column(s) containing transect ID's must be correct here if abundance is to be
#' estimated later. Routine \code{\link{abundEstim}} requires transect ID's for bootstrapping
#' because it resamples unique values of the composite transect ID column(s). \code{abundEstim}
#' uses the value of \code{transectID} specified here and hence users cannot change transect ID's between
#' calls to \code{dfuncEstim} and \code{abundEstim} and all \code{transectID} columns
#' must be present in both data frames even though they may not be used until later.
#'
#' An error occurs if both \code{detectionData} and \code{siteData} are specified
#' but no common columns exist. Duplicate \code{transectID} values are not allowed in \code{siteData}
#' but are allowed in \code{detectionData} because multiple detections can occur on a single transect
#' or at a single site. If the same site is surveyed in multiple years, specify another level of transect ID;
#' for example, \code{transectID} = \code{c("year","transectID")}.
#'
#' }
#'
#'
#'
#' @section Measurement Units:
#' As of \code{Rdistance} version 3.0.0, measurement units are
#' require on all distances. This includes off-transect
#' distances, radial
#' distances, truncation distances (\code{w.lo} and \code{w.hi}),
#' transect lengths, and study size area.
#' In \code{dfuncEstim}, units are required on the following:
#' \code{detectionData$dist}; \code{w.lo} (unless it is zero);
#' \code{w.hi} (unless it is NULL);
#' and \code{x.scl}. In \code{abundEstim}, units are
#' required on \code{siteData$length} and \code{area}. All units are
#' 1-dimensional except those on \code{area}, which are 2-dimensional.
#'
#' Requiring units ensures that internal calculations and results
#' (e.g., ESW and abundance) are correct
#' and that output units are clear.
#' Input distances can have variable units. For example,
#' input distances can be in specified in "m", \code{w.hi} in "in",
#' and \code{w.lo} in "km". Internally, all distances are
#' converted to the units specified by \code{outputUnits}
#' (or the units of input distances if
#' \code{outputUnits} is NULL), and
#' all output is reported
#' in units of \code{outputUnits}.
#'
#' Measurement units can be assigned using
#' \code{units()<-} after attaching the \code{units}
#' package or with \code{x <- units::set_units(x, "<units>")}.
#' See \code{units::valid_udunits()}
#' for a list of valid symbolic units.
#'
#' If measurements are truly unit-less, or measurement units are unknown,
#' set \code{RdistanceControls(requireUnits = FALSE)}. This suppresses
#' all unit checks and conversions. Users are on their own
#' to make sure inputs are scaled correctly and that output units are known.
#'
#' @return An object of class 'dfunc'. Objects of class 'dfunc'
#' are lists containing the following components:
#' \item{parameters}{The vector of estimated parameter values.
#' Length of this vector for built-in likelihoods is one
#' (for the function's parameter) plus the
#' number of expansion terms plus one if the likelihood is
#' either 'hazrate' or 'uniform' (hazrate and uniform have
#' two parameters). }
#' \item{varcovar}{The variance-covariance matrix for coefficients
#' of the distance function, estimated by the inverse of the Hessian
#' of the fit evaluated at the estimates. There is no guarantee this
#' matrix is positive-definite and should be viewed with caution.
#' Error estimates derived from bootstrapping are generally
#' more reliable.}
#' \item{loglik}{The maximized value of the log likelihood
#' (more specifically, the minimized value of the negative
#' log likelihood).}
#' \item{convergence}{The convergence code. This code
#' is returned by \code{optim}. Values other than 0 indicate suspect
#' convergence.}
#'
#' \item{like.form}{The name of the likelihood. This is
#' the value of the argument \code{likelihood}. }
#'
#' \item{w.lo}{Left-truncation value used during the fit.}
#'
#' \item{w.hi}{Right-truncation value used during the fit.}
#'
#' \item{detections}{A data frame of detections within the strip
#' or circle used in the fit. Column 'dist' contains the
#' observed distances.
#' Column 'groupSize' contains group sizes associated with
#' the values of 'dist'. Group
#' sizes are only used in \code{abundEstim}. This data frame
#' contains only distances between \code{w.lo} and \code{w.hi}.
#' Another component of the returned object, i.e., \code{model.frame}
#' contains all observations in the input data, including those outside the strip.}
#'
#' \item{covars}{Either NULL if no covariates are included in the
#' detection function, or a \code{model.matrix} containing the covariates
#' used in the fit. Factors in in the model.matrix version have been expanded
#' into 0-1 indicator variables based on R contrasts in effect at the time
#' of the call. Only covariates associated with distances inside the strip
#' or circle are included. }
#'
#' \item{model.frame}{A \code{model.frame} object containing observed distances
#' (the 'response'), covariates specified in the formula, and group sizes if they
#' were specified. If specified, the name of the group size column is "offset(-variable-)",
#' not "groupsize(-variable-)", because internally it is easier to treat group sizes
#' as an offset in the model. This component is a proper \code{model.frame} and contains
#' both 'terms' and 'contrasts' attributes. }
#'
#' \item{siteID.cols}{A vector containing the transect ID column names in \code{detectionData}
#' and \code{siteData}. Transect IDs can be a composite of two or more columns and hence
#' this component can have length greater than 1. }
#'
#' \item{expansions}{The number of expansion terms used
#' during estimation.}
#'
#' \item{series}{The type of expansion used during estimation.}
#'
#' \item{call}{The original call of this function.}
#'
#' \item{call.x.scl}{The \emph{input} or user requested
#' distance at which the distance function is scaled. }
#'
#' \item{call.g.x.scl}{The \code{input} value specifying the
#' height of the distance function at a distance
#' of \code{call.x.scl}. }
#'
#' \item{call.observer}{The value of input parameter \code{observer}.
#' The input \code{observer} parameter is only applicable when
#' \code{g.x.scl} is a data frame.}
#'
#' \item{fit}{The fitted object returned by \code{optim}.
#' See documentation for \code{optim}.}
#'
#' \item{factor.names}{The names of any factors in \code{formula}. }
#'
#' \item{pointSurvey}{The input value of \code{pointSurvey}.
#' This is TRUE if distances are radial from a point. FALSE
#' if distances are perpendicular off-transect. }
#'
#' \item{formula}{The formula specified for the detection function.}
#'
#' \item{control}{A list containing values of the 'control' parameters
#' set by \code{RdistanceControls}.}
#'
#' \item{outputUnits}{The measurement units used for output. All
#' distance measurements are converted to these units internally. }
#'
#' \item{x.scl}{The \emph{actual} distance at which
#' the distance function is scaled to some value.
#' i.e., this is the actual \emph{x} at
#' which g(\emph{x}) = \code{g.x.scl}.
#' Note that \code{call.x.scl} = \code{x.scl} unless
#' \code{call.x.scl} == "max", in which case \code{x.scl} is the
#' distance at which \emph{g}() is maximized. }
#'
#' \item{g.x.scl}{The \emph{actual} height of the distance function
#' at a distance of \code{x.scl}. Note that \code{g.x.scl} =
#' \code{call.g.x.scl} unless \code{call.g.x.scl}
#' is a multiple observer data frame, in which case \code{g.x.scl} is the
#' actual height of the distance function at \code{x.scl} computed
#' from the multiple observer data frame. }
#'
#' @references Buckland, S.T., D.R. Anderson, K.P. Burnham, J.L. Laake, D.L. Borchers,
#' and L. Thomas. (2001) \emph{Introduction to distance sampling: estimating
#' abundance of biological populations}. Oxford University Press, Oxford, UK.
#'
#' @seealso \code{\link{abundEstim}}, \code{\link{autoDistSamp}}.
#' Likelihood-specific help files (e.g., \code{\link{halfnorm.like}}).
#' See package vignettes for additional options.
#'
#' @examples
#' # Load example sparrow data (line transect survey type)
#' data(sparrowDetectionData)
#'
#' dfunc <- dfuncEstim(formula = dist ~ 1
#' , detectionData = sparrowDetectionData)
#' dfunc
#' plot(dfunc)
#'
#' @keywords model
#' @export
#' @importFrom stats nlminb model.response is.empty.model
#' @importFrom stats model.matrix contrasts optim
#' @import units
dfuncEstim <- function (formula
, detectionData
, siteData
, likelihood = "halfnorm"
, pointSurvey = FALSE
, w.lo = units::set_units(0,"m")
, w.hi = NULL
, expansions = 0
, series = "cosine"
, x.scl = units::set_units(0,"m")
, g.x.scl = 1
, observer = "both"
, warn = TRUE
, transectID = NULL
, pointID = "point"
, outputUnits = NULL
, control = RdistanceControls()){
# To-do: remove control = RdistanceControls. Put all options in
# 'options(<name> = <value>)' in onAttach function.
cl <- match.call()
if(!missing(detectionData) & !missing(siteData)){
if( is.null(transectID) ){
transectID <- intersect( names(detectionData), names(siteData) )
if( pointSurvey ){
pointID <- NULL # multiple pts per transect not allowed when no transectID specified
}
}
if( pointSurvey ){
siteID.cols <- c(transectID, pointID)
} else {
siteID.cols <- c(transectID)
}
data <- merge(detectionData, siteData, by=siteID.cols) # if not all siteID.cols present, this fails
} else if(!missing(detectionData)){
data <- detectionData
if( is.null(transectID) ){
transectID <- "siteID"
if( pointSurvey ){
pointID <- NULL
}
}
if( pointSurvey ){
siteID.cols <- c(transectID, pointID)
} else {
siteID.cols <- c(transectID)
}
# Check for presence of transectID columns in detectionData
if( !all( siteID.cols %in% names(detectionData)) ){
stop(paste0("Valid site ID columns must be specified."
, " The following ID column(s) were not found in "
, deparse(substitute(detectionData))
, ": "
, paste(siteID.cols[!(siteID.cols %in% names(detectionData))], collapse = ", ")
))
}
} else{
data <- NULL
siteID.cols <- NULL
}
if( likelihood == "uniform" ){
.Deprecated(new = "logistic.like"
, package = "Rdistance"
, msg = paste("'unform.like' been re-named and is deprecated.\n"
, "Using likelihood = 'logistic' instead.")
, old = "uniform.like")
likelihood <- "logistic"
}
# (jdc) The double-observer method hasn't been checked since updating to version 2.x
# It's likely that an error would be raised if a user did try the double-observer option,
# but I'm adding a warning here, just in case
if(observer != "both") {
stop("The double-observer routines have not been tested in Rdistance
versions >2.x, so they have been disabled for the time being.
Contact the Rdistance authors if you need double observer analyses
and can help.")
}
# Much easier to convert "groupsize" to "offset" in formula because
# model.frame and others treat offset correctly.
formulaChar <- as.character(formula) # [1] = "~"; [2] = LHS; [3] = RHS
formulaChar[3] <- gsub( "groupsize\\(", "offset(", formulaChar[3] )
formula <- formula( paste(formulaChar[c(2,1,3)], collapse = " ") )
mf <- getDfuncModelFrame(formula, data)
mt <- terms(mf)
if( attr(mt, "response") == 0 ){
stop("Formula must have a response on LHS of '~'.")
}
dist <- model.response(mf,"any")
if( !is.null(attr(mt, "offset")) ){
# groupsize specified in formula
groupSize <- stats::model.offset(mf)
} else {
groupSize <- rep(1, length(dist))
}
covars <- if (!is.empty.model(mt)){
model.matrix(object = mt,
data = mf,
contrasts.arg = control$contrasts )
}
contr <- attr(covars,"contrasts")
assgn <- attr(covars,"assign")
# Check for measurement units
if( !inherits(dist, "units") & control$requireUnits ){
dfName <- deparse(substitute(detectionData))
respName <- as.character(attr(mt, "variables"))[attr(mt, "response") + 1]
stop(paste("Distance measurement units are required.",
"Assign units by attaching 'units' package then:\n",
paste0("units(",dfName,"$", respName, ")"), "<- '<units of measurment>',\n",
"for example 'm' (meters) or 'ft' (feet). See units::valid_udunits()"))
} else if( control$requireUnits ){
# if we are here, dist has units
# set units for output by converting dist units; w.lo, w.hi, and x.scl will all be converted later
if( !is.null(outputUnits) ){
dist <- units::set_units(dist, outputUnits, mode = "standard")
}
outUnits <- units(dist)
}
if( !inherits(w.lo, "units") & control$requireUnits ){
if( w.lo[1] != 0 ){
stop(paste("Units of minimum distance are required.",
"Assign units by attaching 'units' package then:\n",
"units(w.lo) <- '<units>' or",
paste0("units::as_units(", w.lo,", <units>) in function call\n"),
"See units::valid_udunits() for valid symbolic units."))
}
# if we are here, w.lo is 0, it has no units, and we require units
w.lo <- units::set_units(w.lo, outUnits, mode = "standard") # assign units to 0
} else if( control$requireUnits ){
# if we are here, w.lo has units and we require units, convert to the output units
w.lo <- units::set_units(w.lo, outUnits, mode = "standard")
}
if(is.null(w.hi)){
w.hi <- max(dist, na.rm=TRUE) # units flow through max automatically
} else if( !inherits(w.hi, "units") & control$requireUnits ){
stop(paste("Max distance measurement units are required.",
"Assign units by attaching 'units' package then:\n",
"units(w.hi) <- '<units>' or",
paste0("units::as_units(", w.hi,", <units>) in function call\n"),
"See units::valid_udunits() for valid symbolic units."))
} else if( control$requireUnits ){
# if we are here, w.hi has units and we require them, convert to output units
w.hi <- units::set_units(w.hi, outUnits, mode = "standard")
}
# Units on x.scl are enforced in F.gx.estim
ncovars <- ncol(covars)
# Truncate for w.lo and w.hi
ind <- (w.lo <= dist) & (dist <= w.hi)
dist <- dist[ind]
groupSize <- groupSize[ind]
covars <- covars[ind,,drop=FALSE] # covars looses "extra" attributes here
attr(covars,"assign") <- assgn
attr(covars,"contrasts") <- contr
# Eventually, I'd like to use a constant
# column for covars to allow intercept only
# models. That is, report a beta coefficient
# for ~1 models, and allow expansions in this case.
# For now, however, we'll just do the "old" method
# of estimating the distance function parameter directly.
factor.names <- NULL
if(ncovars==1){
if( assgn==0 ){
# constant; intercept only model
covars <- NULL
}
}
# Minimum number of spline basis functions
if(expansions < 2 & series == "bspline"){
expansions <- 2
if(warn) warning("Minimum spline expansion terms = 2. Have used 2.")
}
# Override x.scl for Gamma likelihood
if( !is.character(x.scl) ){
if( inherits(x.scl, "units") ){ # this if needed cause drop units does not work on plain vector
isZero <- units::drop_units(x.scl) == 0
} else {
isZero <- x.scl == 0
}
if( isZero & likelihood == "Gamma" ){
x.scl <- "max"
# warning("Cannot specify g(0) for Gamma likelihood. x.scl changed to 'max'.")
}
}
# Find which columns are factors.
# This works when covars is NULL and must be called
# even when ncovars == 1 to cover case like dist ~ -1+x (no intercept)
for(i in 1:ncol(mf)){
if(is.factor(mf[,i])){
factor.names <- c(factor.names, names(mf)[i])
}
}
vnames<-dimnames(covars)[[2]]
# Stop and print error if dist vector contains NAs
if(any(is.na(dist))) stop("Please remove detections for which dist is NA.")
strt.lims <- F.start.limits(likelihood
, expansions
, w.lo
, w.hi
, dist
, covars
, pointSurvey)
# Perform optimization
if(control$optimizer == "optim"){
fit <- optim(strt.lims$start,
F.nLL,
lower = units::drop_units(strt.lims$lowlimit), # safe
upper = units::drop_units(strt.lims$uplimit),
hessian = TRUE,
control = list(trace = 0,
maxit = control$maxIters,
factr = control$likeTol,
pgtol = control$likeTol),
method = c("L-BFGS-B"),
dist = dist,
like = likelihood,
covars = covars,
w.lo = w.lo,
w.hi = w.hi,
expansions = expansions,
series = series,
pointSurvey = pointSurvey,
for.optim = T)
} else if(control$optimizer == "nlminb"){
fit <- nlminb(start = strt.lims$start
, objective = F.nLL
, lower = strt.lims$lowlimit
, upper = strt.lims$uplimit
, control = list(trace = 0
, eval.max = control$evalMax
, iter.max = control$maxIters
, rel.tol = control$likeTol
, x.tol = control$coefTol
)
, dist = dist
, like = likelihood
, covars = covars
, w.lo = w.lo
, w.hi = w.hi
, expansions = expansions
, series = series
, pointSurvey = pointSurvey
, for.optim = T
)
names(fit)[names(fit)=="evaluations"]<-"counts"
fit$hessian <- secondDeriv(fit$par,
F.nLL,
eps=control$hessEps,
dist = dist,
like = likelihood,
covars = covars,
w.lo = w.lo,
w.hi = w.hi,
expansions = expansions,
series = series,
pointSurvey = pointSurvey,
for.optim = T
)
} else {
stop("Unknown optimizer function in control object")
}
if(!any(is.na(fit$hessian)) & !any(is.infinite(fit$hessian))){
qrh <- qr(fit$hessian)
if (qrh$rank < nrow(fit$hessian)) {
if (warn) warning("Singular variance-covariance matrix.")
varcovar <- matrix(NaN, nrow(fit$hessian), ncol(fit$hessian))
} else {
varcovar <- tryCatch(solve(fit$hessian), error=function(e){NaN})
if(length(varcovar) == 1 && is.nan(varcovar)){
if (warn) warning("Singular variance-covariance matrix.")
varcovar <- matrix(NaN, nrow(fit$hessian), ncol(fit$hessian))
}
}
} else {
if (warn) warning("fit did not converge, or converged to (Inf,-Inf)")
varcovar <- matrix(NaN, nrow(fit$hessian), ncol(fit$hessian))
}
names(fit$par) <- strt.lims$names
ans <- list(parameters = fit$par,
varcovar = varcovar,
loglik = fit$value,
convergence = fit$convergence,
like.form = likelihood,
w.lo = w.lo,
w.hi = w.hi,
detections = data.frame(dist, groupSize),
covars = covars,
model.frame = mf,
siteID.cols = siteID.cols,
expansions = expansions,
series = series,
call = cl,
call.x.scl = x.scl,
call.g.x.scl = g.x.scl,
call.observer = observer,
fit = fit,
factor.names = factor.names,
pointSurvey = pointSurvey,
formula = formula,
control = control,
outputUnits = outUnits)
ans$loglik <- F.nLL(ans$parameters
, ans$detections$dist
, covars = ans$covars
, like = ans$like.form
, w.lo = ans$w.lo
, w.hi = ans$w.hi
, series = ans$series
, expansions = ans$expansions
, pointSurvey = ans$pointSurvey
, for.optim = F)
# Assemble results
class(ans) <- "dfunc"
if( ans$like.form != "Gamma" ){
# not absolutely necessary. Could estimate these later in print and plot methods.
# but this saves a little time.
gx <- F.gx.estim(ans)
ans$x.scl <- gx$x.scl
ans$g.x.scl <- gx$g.x.scl
} else {
# Special case of Gamma
ans$x.scl <- x.scl
ans$g.x.scl <- g.x.scl
}
# ---- Check parameter boundaries ----
fuzz <- 1e-06
low.bound <- fit$par <= (strt.lims$lowlimit + fuzz)
high.bound <- fit$par >= (strt.lims$uplimit - fuzz)
if (fit$convergence != 0) {
if (warn) warning(fit$message)
}
if (any(low.bound)) {
ans$convergence <- -1
messL <- paste(paste(strt.lims$names[low.bound], "parameter at lower boundary.")
, collapse = "; ")
ans$fit$message <- messL
if (warn) warning(ans$fit$message)
} else {
messL <- NULL
}
if (any(high.bound)) {
ans$convergence <- -1
messH <- paste(paste(strt.lims$names[high.bound], "parameter at upper boundary.")
, collapse = "; ")
ans$fit$message <- c(messL, messH)
if (warn) warning(ans$fit$message)
}
ans
} # end function
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.