hmltm: Line transect estimation with hidden Markov availability models

Documented in bsample cv histline inv.logit is.nullmodel lnci.nmin logit make.covb make.onesit make.survey.pars nm2m numericfy truncdat

#                      -----------------------------
#--------------------- Start DLB's utility functions --------------------
#' @title Clone of \code{sample} without weird behaviour for length(x)==1.
#'
#' @usage bsample(x, size, replace = FALSE, prob = NULL)
#' 
#' @description
#'  Identical to \code{sample} but when x is an integer it just returns x rather than an
#'  integer in 1:x (which is what \code{sample} does).
#'  
#' @param x Either a vector of one or more elements from which to choose, or a positive integer. 
#' See \link{sample} for details.
#' @param size a positive number, the number of items to choose from.
#' @param replace Should sampling be with replacement?
#' @param prob A vector of probability weights for obtaining the elements of the vector being sampled.
#' 
bsample=function(x,size,replace=FALSE,prob=NULL) {
  if(length(x)==1) return(x)
  else return(sample(x,size,replace,prob))
}


#' @title Caclulates coefficient of variation.
#'
#' @description
#'  Utility function
#'  
#' @param x Random variable.
cv=function(x) sd(x)/mean(x) # calculates coefficient of variation

#' @title Converts nm to metres.
#'
#' @description
#'  Utility function
#'  
#' @param x Distance in nautical miles.
nm2m=function(x) return(x*1852) # converts nautical miles to metres

#mod=function(x,y) return(x-floor(x/y)*y) # returns the integer part of x/y


#' @title Draws histogram.
#'
#' @description
#'  Utility function to draw histograms with more options than \code{hist} allows.
#'  
#' @param height Height of histogram bars.
#' @param breaks Locations of boundaries of histogram bins (must be 1 longer than \code{height}).
#' @param lineonly If TRUE, uses \code{\link{lines}} to draw lines on current plot; else uses 
#' \code{\link{plot}} to draw lines on new plot.
#' @param outline If TRUE, draws only the outline (profile) of the histogram; else draws each 
#' complete bar.
#' @param fill If TRUE, uses polygon() to fill barsl in this case valid arguments to polygon() 
#' are passed via argument(s) "...". If fill==FALSE, valid arguments to plot() or lines() are 
#' passed via argument(s) "..."
#' @param ylim Range of y-axis.
#' @param xlab Label for x-axis.
#' @param ylab Label for y-axis.
#' @param ... See aargument \code{fill}.
histline=function(height,breaks,lineonly=FALSE,outline=FALSE,fill=FALSE,ylim=range(height),
                  xlab="x",ylab="y",...)
{
  n=length(height)
  if(length(breaks)!=(n+1)) stop("breaks must be 1 longer than height")
  if(outline) {
    y=c(0,rep(height,times=rep(2,n)),0)
    x=rep(breaks,times=rep(2,(n+1)))
  }   else {
    y=rep(0,4*n)
    x=rep(0,4*n+2)
    for(i in 1:n) {
      y[((i-1)*4+1):(i*4)]=c(0,rep(height[i],2),0)
      x[((i-1)*4+1):(i*4)]=c(rep(breaks[i],2),rep(breaks[i+1],2))
    }
    x=x[1:(4*n)]
  }
  if(lineonly) {
    if(!fill) lines(x,y,...)
    else polygon(x,y,...)
  } else {
    if(!fill) plot(x,y,type="l",ylim=ylim,xlab=xlab,ylab=ylab,...)
    else {
      plot(x,y,type="n",ylim=ylim,xlab=xlab,ylab=ylab)
      polygon(x,y,...)
    }
  }
}


#' @title Integration using Simpson's method.
#'
#' @description
#'  Modified version of function sintegral from library Bolstad.
#'  
#' @param fx Values at \code{x} (see below) of the function to be integrated.
#' @param x Values at which function fx has been evaluated.
#' @param n.pts The number of points to be used in integration. If \code{x} contains more than
#'\code{n.pts} then \code{n.pts} will be set to \code{length(x)}.
#' @param type if equal to `\code{cdf}' a list comprising x-values and cdf values at each x-value 
#' is returned, else the integral over the range of \code{x} is returned.
sintegral=function (fx, x, n.pts=16, type="int") 
{
  #  if (class(fx) == "function") 
  #    fx = fx(x)
  #  n.x = length(x)
  #  if (n.x != length(fx)) 
  #    stop("Unequal input vector lengths")
  #  if (n.pts < 64) 
  #    n.pts = 64
  ap = approx(x, fx, n = 2 * n.pts + 1)
  h = diff(ap$x)[1]
  integral = h*(ap$y[2*(1:n.pts)-1]+4*ap$y[2*(1:n.pts)]+ap$y[2*(1:n.pts)+1])/3
  if(type!="cdf") return(sum(integral)) 
  else return(list(x=ap$x[2*(1:n.pts)],y=cumsum(integral)))
}



#' @title Logit function.
#'
#' @description
#'  Logit function
#'  
#' @param p probability (scalar or vector).
logit=function(p) return(log(p/(1-p)))  # returns logit of p

#' @title Inverse logit function.
#'
#' @description
#'  Inverse logit function
#'  
#' @param x scalar or .
inv.logit=function(x) return(1/(1+exp(-x))) # returns p from x=(logit of p)

#' @title Lognormal confidence interval with lower bound
#'
#' @usage lnci.nmin(n,Nhat,cv)
#' 
#' @description
#'  Calculates 95\% confidence interval for abundance estimate \code{Nhat}, based on the assumption that 
#'  \code{Nhat} is lognormally distributed and that \code{n} population members were observed (so that 
#'  lower bound of confidence interval can't be below \code{n}.
#'  
#' @param n observed number of population members.
#' @param Nhat point estimate of abundance.
#' @param cv coefficient of variation of \code{Nhat}.
#' 
#' @export
lnci.nmin=function(n,Nhat,cv){
  varNhat=(Nhat*cv)^2
  cfactor=exp(1.96*sqrt(log(1+varNhat/(Nhat-n)^2)))
  lower=n+(Nhat-n)/cfactor
  upper=n+(Nhat-n)*cfactor
  return(list(lower=lower,upper=upper))
}

#' @title Data truncation.
#'
#' @description
#' Left- and/or right-truncates any variable in data frame dat, inserting effort-only record if 
#' truncation removes all detections on a transect.
#' Subtracts left-trunction point off all variable values - after all truncation.

#' 
#' @param dat distance data frame. Must have columns for stratum stratum.area, transect, transect.length, 
#' <any-observer-level_variable>, and if \code{twosit}==TRUE then object as well. These can 
#' have any names, but the names must be specified, via argument \code{colnames}.
#' @param minvalue left-truncation point.
#' @param maxvalue right-truncation point.
#' @param twosit If TRUE, assumes this is an mrds-type dataset with two lines per detection.
#' @param colnames name of columns containing stratum, stratum area, transect, transect length, 
#' <any-observer-level_variable>, and if \code{twosit}==TRUE then object as well, IN THIS ORDER, 
#' in a character vector. The default value is 
#' colnames=c("stratum","area","transect","L","x","obs").
truncdat=function(dat,minval=0,maxval=NULL,twosit=FALSE,colnames=c("stratum","area","transect","L","x","obs")){
  tdat=dat
  keepcols=rep(NA,4)
  for(i in 1:4) {
    keepcols[i]=which(names(dat)==colnames[i])
    if(is.null(keepcols[i])) stop("No column in dat called ",colnames[i])
  }
  NAs=dat[1,,drop=FALSE]
#  NAs[,-keepcols]=NA
  NAs[,c("object","x","y")]=NA
  xcol=which(names(dat)==colnames[5])
  if(is.null(xcol)) stop("No column in dat called ",colnames[5])
  if(is.null(maxval)) maxval=max(na.omit(dat[,xcol]))
  if(twosit){
    if(length(colnames)<6) stop("With double-observer data, need 6th colname for observer; only 5 colnames:",colnames)
    obscol=which(names(dat)==colnames[6])
    if(is.null(obscol)) stop("No column in dat called ",colnames[6])
    out1=which(dat[,obscol]==1 & (dat[,xcol]<minval | dat[,xcol]>maxval))
    out2=which(dat[,obscol]==2 & (dat[,xcol]<minval | dat[,xcol]>maxval))
    if(length(out1) != length(out2)) stop("Different number of obs1 and obs2 detections for left-truncation")
    if(unique(out2-out1) != 1) stop("Looks like non-consecutive obs1 and obs2 detections for left-truncation")
    nout=length(out1)
    
    if(nout>0) {
      svalues=tvalues=rep(NA,nout)
      if(is.factor(dat[,keepcols[1]])) {
        svalues=rep(levels(dat[,keepcols[1]])[1],nout)
        levels(svalues)=levels(dat[,keepcols[1]])
        NAs[keepcols[1]]=svalues[1] # put a factor in the NA row (arbitrary level, as it will be overwritten)
        levels(NAs)=levels(svalues)
      }
      if(is.factor(dat[,keepcols[3]])) {
        tvalues=rep(levels(dat[,keepcols[3]])[3],nout)
        levels(tvalues)=levels(dat[,keepcols[3]])
        NAs[keepcols[3]]=tvalues[1] # put a factor in the NA row (arbitrary level, as it will be overwritten)
        levels(NAs)=levels(tvalues)
      }
      outeff=data.frame(stratum=svalues,area=rep(NA,nout),transect=tvalues,L=rep(NA,nout))
      if(is.factor(outeff$stratum)) levels(outeff$stratum)=levels(svalues)
      if(is.factor(outeff$transect)) levels(outeff$stratum)=levels(tvalues)
      for(i in 1:nout) outeff[i,]=dat[out1[i],keepcols] # store effort info for truncated sightings
      tdat=dat[-c(out1,out2),] # remove all truncated sightings
      for(i in 1:nout) {
        got=which(tdat[,keepcols[1]]==outeff$stratum[i] & tdat[,keepcols[2]]==outeff$area[i] & 
                    tdat[,keepcols[3]]==outeff$transect[i] & tdat[,keepcols[4]]==outeff$L[i])
        if(length(got)==0) { # transect no longer in truncated dataset
          tdat=rbind(tdat,NAs) # add row of NAs
          tdat[dim(tdat)[1],keepcols]=outeff[i,] # add in missing transect info
        }
      }  
    }
  } else {
    out=which(dat[,xcol]<minval | dat[,xcol]>maxval)
    nout=length(out)
    
    if(nout>0) {
      svalues=tvalues=rep(NA,nout)
      if(is.factor(dat[,keepcols[1]])) {
        svalues=rep(levels(dat[,keepcols[1]])[1],nout)
        levels(svalues)=levels(dat[,keepcols[1]])
        NAs[keepcols[1]]=svalues[1] # put a factor in the NA row (arbitrary level, as it will be overwritten)
        levels(NAs)=levels(svalues)
      }
      if(is.factor(dat[,keepcols[3]])) {
        tvalues=rep(levels(dat[,keepcols[3]])[3],nout)
        levels(tvalues)=levels(dat[,keepcols[3]])
        NAs[keepcols[3]]=tvalues[1] # put a factor in the NA row (arbitrary level, as it will be overwritten)
        levels(NAs)=levels(tvalues)
      }
      outeff=data.frame(stratum=svalues,area=rep(NA,nout),transect=tvalues,L=rep(NA,nout))
      if(is.factor(outeff$stratum)) levels(outeff$stratum)=levels(svalues)
      if(is.factor(outeff$transect)) levels(outeff$stratum)=levels(tvalues)
      #    for(i in 1:nout) outeff[i,]=dat[out1[i],keepcols] # store effort info for truncated sightings
      for(i in 1:nout) outeff[i,]=dat[out[i],keepcols] # store effort info for truncated sightings
      tdat=dat[-out,] # remove all truncated sightings
      for(i in 1:nout) {
        got=which(tdat[,keepcols[1]]==outeff$stratum[i] & tdat[,keepcols[2]]==outeff$area[i] & 
                    tdat[,keepcols[3]]==outeff$transect[i] & tdat[,keepcols[4]]==outeff$L[i])
        if(length(got)==0) { # transect no longer in truncated dataset
          tdat=rbind(tdat,NAs) # add row of NAs
          tdat[dim(tdat)[1],keepcols]=outeff[i,] # add in missing transect info
        }
      }
    }
  }
  
  if(nout>0) {
    tdat=tdat[order(tdat[keepcols[1]],tdat[keepcols[3]]),]
    tdat[!is.na(tdat[,xcol]),xcol]=tdat[!is.na(tdat[,xcol]),xcol]-minval # shift all left
  }
  return(tdat)
}


#' @title Reduces MRDS data frame to CDS data frame.
#'
#' @param dat distance data frame in mrds form. Must have columns 'object' (unique detection 
#' identifier), 'seen' (binary indicating detecteb by observer or not) and 'y' (forward) 
#' detection distance.
#' @param prefer which of the two observers' data to prefer when forward distances are 
#'   missing/equal must be 1 or 2.
#'   
#' @description
#' Reduces mark-recapture Distance sampling (MRDS) data frame dat, with two lines per detection, to a 
#' conventional distance sampling (CDS) data frame with a single line per detection. In the case of 
#' duplicates ("recaptures"), takes the information from the detection made farthest ahead (i.e. that
#' with larger \code{y}) With duplicates that have the same \code{y} or neither of which have a 
#' \code{y}, it chooses according to the parameter \code{prefer}. If only one of the duplicates 
#' has a \code{y}, it chooses that one.
#' 
#' @param dat MRDS data frame.
make.onesit=function(dat,prefer=1) {
  if(prefer!=1 & prefer!=2) stop("Argument 'prefer' must be 1 or 2.")
  n=dim(dat)[1]
  out=rep(FALSE,n)
  i=1
  while(i<=n){
    if(!is.na(dat$object[i])){
      if(dat$seen[i]==0) out[i]=TRUE
      if(dat$seen[i+1]==0) out[i+1]=TRUE
      if(dat$seen[i]==1 & dat$seen[i+1]==1) {
        if(is.na(dat$y[i]) & is.na(dat$y[i+1])) { # no y's; remove not preferred detection
          out[i+(3-prefer)-1]=TRUE
        } else if(is.na(dat$y[i]) & !is.na(dat$y[i+1])) { # keep only non-NA y
          out[i]=TRUE
        } else if(!is.na(dat$y[i]) & is.na(dat$y[i+1])){ # keep only non-NA y
          out[i+1]=TRUE
        } else if(dat$y[i]==dat$y[i+1]){# remove not preferred detection
          out[i+(3-prefer)-1]=TRUE
        } else if(dat$y[i]<dat$y[i+1]){ # remove later (closer) detection
          out[i]=TRUE
        } else {
          out[i+1]=TRUE
        }
      }
      i=i+2
    } else i=i+1
  }
  dat=dat[!out,] # remove worse observer's lines
  return(dat)
}



#' @title Makes survey parameter list (\code{survey.pars}) suitable for passing to 
#' \code{\link{est.hmltm}}.
#'
#' @description
#' \code{make.survey.pars} just puts a bunch of stuff in a list in a format that \code{\link{est.hmltm}}
#'  expects.
#'
#' @param spd is observer speed.
#' @param W is perpendicular right-truncation distance for estimation.
#' @param ymax is maximum forward distance for estimation - it should be far enough ahead that it is
#' not possible to detect anyting beyond \code{ymax}.
#' @param Wl is perpendicular left-truncation distance for estimation. (After truncating \code{Wl} is 
#' subtracted from all perpendicular distances.)
#' @param dT is the time step on which the availability hidden Markov model operates.
#' @param theta.f is REDUNDANT and should be removed. It must equal 0. 
#' @param theta.b is REDUNDANT and should be removed. It must equal 90. 
#'
#' @details Packs the above in a list suitable for passing as \code{survey.pars} to 
#' \code{\link{est.hmltm}}.
make.survey.pars=function(spd,W,ymax,Wl=0,dT=1,theta.f=0,theta.b=90){
  return(list(spd=spd,W=W,ymax=ymax,Wl=Wl,dT=dT,dy=spd*dT,theta.f=theta.f,theta.b=theta.b))
}

#' @title Decides if model is a null model.
#'
#' @description
#'  Logical function: true if \code{models} includes no covariates
#'  
#' @param models list of characters with elements \code{$y} and \code{$x} specifying y- and 
#' x-covariate models. Either \code{NULL} or regression model format (without response on left).
is.nullmodel=function(models){
  null=TRUE
  for(i in 1:length(models)) null=null & is.null(models[[i]])
  return(null)
}


#' @title Constructs linear predictor.
#'
#' @description
#' Returns parameter vector on linear predictor scale for hazard function \code{FUN}, using 
#' \code{model} and data frame \code{dat}. Makes a matrix with each row the parameter values for an 
#' observation, then concatenates these into a single vector (so can pass as vector to C++ code).
#'  
#' @param b parameter vector.
#' @param FUN detection hazard function name (character).
#' @param models list with two components (\code{$y} and \code{$x}) specifying models for detection 
#' hazard function scale parameters in forward (y) and perpendicular (x) dimensions. If detection 
#' hazard function form does not have separate scale parameters in y- and x- dimensions, the model
#' given in \code{$y} is take as the scale parameter model. \code{$y} and \code{$x} must be either 
#' \code{NULL} or "~<regspec>", where <regspec> is a regression model spefication (e.g. 
#' \code{height+weight} or \code{height:weight} or \code{height*weight}, etc.).
#' @param dat data frame, which must have columns corresponding to variable names in \code{$y} and
#' \code{$x}.
make.covb=function(b,FUN,models,dat)
{
  nfixed=switch(FUN,
                h.IP.0 = 2,
                h.EP1.0 = 2,
                h.EP2.0 = 3,
                h.EP1x.0 = 3,
                h.EP2x.0 = 4,
                0
  )
  if(nfixed==0) stop("Hazard model ",FUN," is not progremmed (yet).")
  if(is.nullmodel(models)) {
    if(length(b) != nfixed) stop("length of b inconsistent with model")
    n=dim(dat)[1]
    #    covb=matrix(c(rep(b,rep(n,nfixed))),ncol=nfixed)
    covb=rep(b,n)
  } else {
    if(FUN=="h.EP2.0" | FUN=="h.EP1.0" | FUN=="h.IP.0"){
      X=model.matrix(as.formula(models$y),data=dat)
      n=dim(X)[1]
      nb=dim(X)[2]
      nfixed=nfixed-1
      if(length(b) != (nfixed+nb)) stop("length of b inconsistent with model")
      covb=matrix(c(rep(b[1:nfixed],rep(n,nfixed)),X%*%b[nfixed+(1:nb)]),ncol=(nfixed+1))
      covb=as.vector(t(covb))
    }
    else if(FUN=="h.EP2x.0" | FUN=="h.EP1x.0"){
      if(!is.null(models$y)){
        X=model.matrix(as.formula(models$y),data=dat)
        n=dim(X)[1]
        nb=dim(X)[2]        
        if(!is.null(models$x)){ # here if have covariates for x and y
          X.x=model.matrix(as.formula(models$x),data=dat)
          nb.x=dim(X.x)[2]
          nfixed=nfixed-2
          if(length(b) != (nfixed+nb+nb.x)) stop("length of b inconsistent with model and model.x")
          covb=matrix(c(rep(b[1:nfixed],rep(n,nfixed)),X%*%b[nfixed+(1:nb)],X.x%*%b[nfixed+nb+(1:nb.x)]),ncol=(nfixed+2))  
          covb=as.vector(t(covb))
        } else { # here if have covariates for y only
          nfixed=nfixed-1
          if(length(b) != (nfixed+nb)) stop("length of b inconsistent with model and model.x")
          #      covb=matrix(c(rep(b[1:nfixed],rep(n,nfixed)),X%*%b[nfixed+(1:nb)]),ncol=(nfixed+1))   #### bitchange
          covb=matrix(c(rep(b[1:(nfixed-1)],rep(n,(nfixed-1))),X%*%b[(nfixed-1)+(1:nb)],rep(b[length(b)],n)),ncol=(nfixed+1))  #### bitchange 
          covb=as.vector(t(covb))
        }
      } else { # here if have covariates for x only
        X.x=model.matrix(as.formula(models$x),data=dat)
        n.x=dim(X.x)[1]
        nb.x=dim(X.x)[2]
        nfixed=nfixed-1
        if(length(b) != (nfixed+nb.x)) stop("length of b inconsistent with model and model.x")
        covb=matrix(c(rep(b[1:nfixed],rep(n.x,nfixed)),X.x%*%b[nfixed+(1:nb.x)]),ncol=(nfixed+1))  
        covb=as.vector(t(covb))        
      }
    } else {
      stop("Invalid FUN.")
    }
  }
  return(covb)
}


#' @title Converts factor or character column of data frame to integer.
#'
#' @description
#' Replaces a specified column, which may contain factor or character variables, with
#' uniquely-numbered integer values. Returns the new data frame and a data frame 
#' showing which factor corresponds to which integer, in a list.
#' 
#' @param dat A data frame
#' @param column The column number or name of the column to be converted.
#' @param add If TRUE a new column is added, named `num<column>', where '<column>'
#' is the value of the \code{column} argument, else the old column is overwritten.
#' 
#' @export
numericfy = function(dat,column=1,add=FALSE){
  vals = dat[,column]
  colnames = names(dat)
  ncols = length(colnames)
  if(is.character(column)) colnum = which(colnames==column)
  else colnum = column
  if(is.numeric(vals)) stop("This column is already numeric. No action taken.")
  if(is.factor(vals)) vals = as.character(vals)
  uvals = unique(vals) # unique values
  nvals = length(uvals) # number of unique values
  ivals = rep(NA,nvals) # new data column (numeric)
  for(i in 1:length(vals)) ivals[i] = which(uvals==vals[i])
  if(add) {
    newdat = cbind(dat,ivals)
    names(newdat)[length(names(newdat))] = paste("num",column,sep="")
  } else {
    if(colnum==1) newdat = cbind(ivals,dat[,(colnum+1):ncols])
    else if(colnum==ncols) newdat = cbind(dat[,1:(colnum-1)],ivals)
    else newdat = cbind(dat[,1:(colnum-1)],ivals,dat[,(colnum+1):ncols])
    names(newdat) = colnames
  }
  lookup = data.frame(new=1:nvals, old=uvals)
  return(list(dat=newdat,lookup=lookup))
}
david-borchers/hmltm documentation built on Oct. 29, 2023, 9:07 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
david-borchers/hmltm
Line transect estimation with hidden Markov availability models

R/utils.R
In david-borchers/hmltm: Line transect estimation with hidden Markov availability models

Defines functions numericfy make.covb is.nullmodel make.survey.pars make.onesit truncdat lnci.nmin inv.logit logit histline nm2m cv bsample

Documented in bsample cv histline inv.logit is.nullmodel lnci.nmin logit make.covb make.onesit make.survey.pars nm2m numericfy truncdat

R Package Documentation

Browse R Packages

We want your feedback!

david-borchers/hmltm Line transect estimation with hidden Markov availability models

R/utils.R In david-borchers/hmltm: Line transect estimation with hidden Markov availability models

Defines functions numericfy make.covb is.nullmodel make.survey.pars make.onesit truncdat lnci.nmin inv.logit logit histline nm2m cv bsample

Documented in bsample cv histline inv.logit is.nullmodel lnci.nmin logit make.covb make.onesit make.survey.pars nm2m numericfy truncdat

R Package Documentation

Browse R Packages

We want your feedback!

david-borchers/hmltm
Line transect estimation with hidden Markov availability models

R/utils.R
In david-borchers/hmltm: Line transect estimation with hidden Markov availability models