# R/integration.constant.R In Rdistance: Distance-Sampling Analyses for Density and Abundance Estimation

#### Documented in integration.constant

#' @title Compute the integration constant for distance density functions
#'
#' @description Using numerical integration, this function computes
#' the area under a distance function between two limits (\code{w.lo}
#' and \code{w.hi}).
#'
#' @param dist Vector of detection distance values.
#'
#' @param density A likelihood function for which the
#' integration constant is sought. This function
#' must be capable of evaluating values between \code{w.lo}
#' and \code{w.hi} and have the following parameters:
#'     \itemize{
#'       \item \samp{a} = Parameter vector.
#'       \item \samp{dist} = Vector of distances.
#'       \item \samp{covars} = If the density allows covariates,
#'       the covariate matrix.
#'       \item \samp{w.lo} = Lower limit or left truncation value.
#'       \item \samp{w.hi} = Upper limit or right truncation value.
#'       \item \samp{series} = Form of the series expansions, if any.
#'       \item \samp{expansions} = Number of expansion terms.
#'       \item \samp{scale} = Whether to scale function to integrate to 1.
#'     }
#'
#' @param w.lo The lower limit of integration, or the left truncation
#' value for perpendicular distances.
#'
#' @param w.hi The upper limit of integration, or the right truncation
#' value for perpendicular distances.
#'
#' @param covars Matrix of covariate values.
#'
#' @param a Vector of parameters to pass to \code{density}.
#'
#' @param series The series to use for expansions.
#' If \code{expansions} > 0, this string
#' specifies the type of expansion. Valid values at
#' present are 'simple', 'hermite', and 'cosine'.
#'
#' @param expansions Number of expansions in \code{density}.
#'
#' @param pointSurvey Boolean. TRUE if point transect data,
#' FALSE if line transect data.
#'
#'
#' @details The trapezoid rule is used to numerically integrate
#' \code{density} from \code{w.lo} to \code{w.hi}. Two-hundred
#' (200) equal-sized trapezoids are used in the integration.  The number
#' of trapezoids to use is fixed and cannot be changed without
#' re-writing this routine.
#'
#' @return A scalar (or vector of scalars if covariates are present)
#' that is the area under \code{density} between \code{w.lo} and \code{w.hi}.
#' This scalar can be used as a divisor to scale density such that
#' it integrates to 1.0. If x = density(\ldots), then
#' x / \code{integration.constant(density, \ldots)} will integrate to 1.0.
#'
#'
#' @examples
#' # Can put any number for first argument (1 used here)
#' scl <- integration.constant(dist=units::set_units(1,"m")
#'                           , density=logistic.like
#'                           , covars = NULL
#'                           , pointSurvey = FALSE
#'                           , w.lo = units::set_units(0,"m")
#'                           , w.hi = units::set_units(100,"m")
#'                           , expansions = 0
#'                           , a=c(75,25))
#' print(scl) # Should be 75.1
#'
#' x <- units::set_units(seq(0,100,length=200), "m")
#' y <- logistic.like( c(75,25), x, scale=FALSE ) / scl
#' int.y <- (x[2]-x[1]) * sum(y[-length(y)]+y[-1]) / 2  # the trapezoid rule, should be 1.0
#' print(int.y) # Should be 1
#'
#' @keywords models
#' @importFrom stats integrate
#' @export

integration.constant <- function(dist,
density,
a,
covars,
w.lo,
w.hi,
series,
expansions,
pointSurvey){

density = match.fun(density)

# We need w.lo, w.hi, and dist to have same units.
# This is important because we occasionally drop units in integral calculations below.
# I cannot think of a case where units(w.lo) != units(dist),
# but just in case...
if( units(w.lo) != units(dist)){
w.lo <- units::set_units(w.lo, units(dist), mode = "standard")
}
if( units(w.hi) != units(dist)){
w.hi <- units::set_units(w.hi, units(dist), mode = "standard")
}

# Now, we can safely compute sequence of x values for numerical integration.
# This is done below, in each case where its needed.

nTrapazoids <- 200 # number of evaluation points in numerical integration

if(!is.null(covars)){
# Not sure following is best to do.
# It is much faster to de-dup and compute values on just
# unique combinations of covariates IF there are large number
# of duplicated covariates.  This is common when using
# factors.  But, if using continuous covariates, this is
# inefficient.  Regardless, this code only computes values
# of the scaling constant for unique combinations of X,
# then merges them into the covar array.  The non-de-dupped
# way to do these calculations is something like
# x <- covars %*% matrix(a,ncol=1), which might be just as fast.

Pkey <- tapply(1:nrow(covars), as.data.frame(covars)) # groups of duplicates
covars <- data.frame(covars, zzzPkey=Pkey)
dupCovars <- duplicated(covars$zzzPkey) unique.covars <- covars[!dupCovars,] PkeyCol <- ncol(unique.covars) # Remember that unique.covars now has extra column, zzzPkey # don't include this column in calculations below (or set a[last]=0) # covars and unique.covars now have Pkey, which we will use to # merge later seqy <- list() temp.scaler <- vector(length = nrow(unique.covars)) scaler <- vector(length = nrow(covars), "numeric") if(pointSurvey){ # This case is POINTS, COVARS, all Likelihoods seqx = seq(w.lo, w.hi, length=nTrapazoids) for(i in 1:nrow(unique.covars)){ temp.covars <- matrix(as.numeric(unique.covars[i,-PkeyCol]) , nrow = length(seqx) , ncol = ncol(unique.covars)-1 , byrow=TRUE) seqy[[i]] <- units::drop_units(seqx) * density(a = a , dist = seqx , covars = temp.covars , scale = FALSE , w.lo = w.lo , w.hi = w.hi , expansions = expansions , series=series ) temp.scaler[i] <- units::drop_units(seqx[2] - seqx[1]) * sum(seqy[[i]][-length(seqy[[i]])] + seqy[[i]][-1]) / 2 } } else if(identical(density, halfnorm.like) & expansions == 0){ # this case is LINES, COVARS, HALFNORM, NO EXPANSIONS # Made this a case because I think it's faster, because we know integral s <- as.matrix(unique.covars) %*% matrix(c(a,0),ncol=1) sigma <- exp(s) # link function here # temp.scaler should be integral under distance function # We happen to know it for halfnorm (and some others below) # Integrals are by defn unit-less; but, pnorm returns units. Drop apriori. # We evaluate normal with mean w.lo, sd = sigma, from -Inf to w.hi, then # subtract 0.5 from result for the area to left of mean (w.lo) temp.scaler <- (pnorm(units::drop_units(w.hi) , units::drop_units(w.lo) , sigma) - 0.5) * sqrt(2*pi) * sigma } else if(identical(density, hazrate.like) & expansions == 0){ # This case is LINES, HAZRATE, COVARS, NO EXPANSIONS # # Integral of hazrate involves incomplete gamma functions. # See wolfram. Incomplete gammas are implemented in some packages, e.g., # expint. You could convert to an exact integral using one of these # packages. But, for now, numerically integrate. seqx = seq(w.lo, w.hi, length=nTrapazoids) beta <- a[-length(a)] K <- a[length(a)] s <- as.matrix(unique.covars[,-PkeyCol]) %*% matrix(beta,ncol=1) sigma <- exp(s) # link function here temp.scaler <- sapply(sigma , FUN = function(s, Seqx, KK){ seqy <- 1 - exp(-(Seqx/s)^(-KK)) scaler <- (Seqx[2] - Seqx[1])*sum(seqy[-length(seqy)] + seqy[-1]) / 2 scaler } , Seqx = units::drop_units(seqx) , KK = K) } else if(identical(density, negexp.like) & expansions == 0){ # This case is LINES, NEGEXP, COVARS, NO EXPANSIONS s <- as.matrix(unique.covars) %*% matrix(c(a,0),ncol=1) beta <- exp(s) temp.scaler <- unname((exp(-beta * units::drop_units(w.lo)) - exp(-beta * units::drop_units(w.hi)))/beta) } else { # For case is for LINES, COVARS, LIKE in {Logistic, User, Gamma} # and ALL LIKELIHOODS with expansions > 0 # # We could do all likelihoods this way (i.e., numerical integration); but, # the above special cases are faster (I think) and more accurate in some cases because # we know the theoretical integral (i.e., for normal and exponential) seqx = seq(w.lo, w.hi, length=nTrapazoids) # function to apply density to each row of covariates likeApply <- function(covs , Seqx , W.lo , W.hi , A , Expansions , Series ){ # Matrix of constant covariates for this case temp.covars <- matrix(covs , nrow = length(Seqx) , ncol = length(covs) , byrow = TRUE ) seqy <- density(dist = Seqx , covars = temp.covars , scale = FALSE , w.lo = W.lo , w.hi = W.hi , a = A , expansions = Expansions , series = Series ) scaler <- units::drop_units(Seqx[2] - Seqx[1])*sum(seqy[-length(seqy)] + seqy[-1]) / 2 scaler } temp.scaler <- apply(X = unique.covars[, -PkeyCol, drop = FALSE] , MARGIN = 1 , FUN = likeApply , Seqx = seqx , W.lo = w.lo , W.hi = w.hi , A = a , Expansions = expansions , Series = series ) } df <- data.frame(unique.covars, temp.scaler) z <- merge(covars, df, by.x="zzzPkey", by.y="zzzPkey", sort=F) scaler <- z$temp.scaler
if(pointSurvey){
scaler <- scaler/units::drop_units(dist)
}
} else if( pointSurvey ){
# This case is POINTS - NO Covariates
seqx = seq(w.lo, w.hi, length=nTrapazoids)
seqy <- units::drop_units(seqx) * density( dist = seqx, scale = FALSE,
w.lo = w.lo, w.hi = w.hi, a = a,
expansions = expansions, series=series)

#   trapezoid rule
scaler <- units::drop_units(seqx[2]-seqx[1]) * sum(seqy[-length(seqy)]+seqy[-1]) / (2*units::drop_units(dist))
} else {
# This case is LINES - NO Covariates
# Density should return unit-less numbers (height of density function)
seqx = seq(w.lo, w.hi, length=nTrapazoids)
seqy <- density( dist = seqx
, scale = FALSE
, w.lo = w.lo
, w.hi = w.hi
, a = a
, expansions = expansions
, series=series
)

#   trapezoid rule
scaler <- units::drop_units(seqx[2]-seqx[1]) * sum(seqy[-length(seqy)]+seqy[-1]) / 2
}

# there are cases where the guess at parameters is so bad, that the integration
# constant is 0 (consider pnorm(100,0,2e30)). But, we don't want to return 0
# because it goes in denominator of likelihood and results in Inf, which is
# not informative.  nlminb guesses NaN after that sometimes. We want to return
# the smallest possible number that does not result in log(x) = -Inf.
# Because of the negative applied in nLL function we actually mant to return
# the largest possible numbers such that when we sum them and others we don't get Inf

if( any(indZeros <- is.na(scaler) |
is.infinite(scaler) |
is.nan(scaler) |
(scaler <= .Machine$double.xmin)) ){ scaler[ indZeros ] <- .Machine$double.xmax / sum(indZeros)
}

# cat(paste("\tscaler = \n\t", paste(scaler, collapse = ", "), "\n"))

scaler
}


## Try the Rdistance package in your browser

Any scripts or data that you put into this service are public.

Rdistance documentation built on July 9, 2023, 6:46 p.m.