knitr::opts_chunk$set(
  cache.path = "cache/model-fit/",
  fig.path = "figure/model-fit/",
  tidy = FALSE,
  comment="",
  message = FALSE,
  eval=FALSE,
  cache=TRUE)

library(sp)
require(spdep)
library(rstan)

Estimating density and capture probability from electrofishing data

Getting the data ready

In order to model the data we need to convert it to a more convient form and add in covariates.

# subset data to remove Stocking, low Runs and no area.
ef <- subset(CLdata::ef, !is.na(Site_OBJECTID) & !is.na(Area) & Runs > 2)

# restructure ef
ef <- do.call(rbind,
        lapply(c("S0", "SP", "T0", "TP"),
            function(x) {
              out <- ef[c("Site_OBJECTID", "Site.Name", "Dataset", "Date", "Runs", "Area", "Trust", paste0(x, "_R", 1:6))]
              names(out) <- gsub(x, "n", names(out))
              out $ Species <- if (substring(x, 1, 1) == "S") "Salmon" else "Trout"
              out $ LifeStage <- if (substring(x, 2, 2) == "0") "Fry" else "Parr"
              out
            }
        ))

# drop rows with all NAs
ef <- ef[apply(ef[paste0("n_R", 1:6)], 1, function(x) !all(is.na(x))), ]

# tag on HMA data
hma <- CLdata::hma
hma <- hma[!(hma $ HAName %in% c("Shetlands", "Orkneys")),]
hma $ hmidx <- 1:nrow(hma)
gis <- CLdata::gis
gis @ data <- cbind(gis @ data, over(gis, hma))

# work out neighbourhood structure of hma
hmaadj0 <- poly2nb(hma, queen = FALSE)
hmaadj <- unclass(hmaadj0)
attributes(hmaadj) <- NULL
names(hmaadj) <- paste(1:length(hmaadj))
# connect hebrides
hmaadj[[which(hma $ HAName == "Outer Hebrides")]] <- which(hma $ HAName == "Inner Hebrides")
hmaadj[[which(hma $ HAName == "Inner Hebrides")]] <-  c(21, 42, 43)


# tag on gis data
ef <- cbind(ef, gis[ef $ Site_OBJECTID,])
# subset put sites above barriers only for density model
#ef <- subset(ef, !barrier)
# fix missing value
ef $ n_R3[is.na(ef $ n_R3) & ef $ Runs == 3] <- 0
# add on data summaries
ef <- cbind(ef, CLmodel::getData(ef, passnames = paste0("n_R", 1:6)))
# add in some dates
ef $ pDate <- as.POSIXlt(ef $ Date, tz = "GMT", format = "%d/%m/%Y")
# try using lubridate
ef $ year <- ef $ pDate $ year + 1900
ef $ doy <- ef $ pDate $ yday
# add river classification
ef $ RIVCLASS <- as.numeric(sapply(strsplit(paste(ef $ RIVCODE), "/"), "[[", 2))
ef $ HC <- ifelse(ef $ RIVCLASS == 0, 0,
             ifelse(round(ef $ RIVCLASS) == ef $ RIVCLASS, 1, 2 ))


# trim data
ef <- subset(ef, doy > 150 & doy < 325 & year >= 1997 & year <= 2013)

now the data is in a suitable form for modelling

str(ef)

fitting a basic model: the basic likelihood

The likelihood of the site densities $\lambda$ and the capture probabilities $p$ can be written in terms of two statistics $T_i=\sum_{s} n_{is}$, the total number of fish caught in each electrofishing event, $i$, and $Z=$ a quantity which measures the effective number of fishing passes - i.e. whether all fish are caught during the first pass or whether they were spread out accross several fishing passes.

$L(\lambda, \beta; T, Z) = \prod_i (p_i (1-p_i)^{S_i - 1 - Z_i} \lambda_i)^{T_i} \exp(- (1-(1-p_i)^{S_i}) \lambda_i )$

We can model both the $p$ and the $\lambda$ parameters in terms of linear combinations of covariates by transforming the density and capture probability onto the real line:

$\log(\lambda_i) = A_i \alpha$ and $\log \frac{p_i}{1-p_i} = B_i \beta$

It is possible to estimate the $\alpha$ parameters conditional on the design matrix $B$ and then the estimates of $\beta$ can be estimated from a poisson regression. However, to conduct model selection or to calculate confidence intervals the full likelihood must be used.

A most simple model is one where we assume the capture probabilities are everywhere constant.

Model fitting proceeds as follows. First we estimate the capture probability using the optimiser in stan

# do some model selection
summaryMods <- function(lst, m0 = NULL) {
  aics <- sapply(lst, "[[", "aic")
  bics <- sapply(lst, BIC)

  tab <-
   data.frame(
    forms = sapply(lst, function(x) paste(deparse(x$formula, width.cutoff = 500L))),
    aic = aics,
    bic = bics
    )

  if (!is.null(m0)) {
    tab $ Daic <- tab $ aic - AIC(m0)
    tab $ Dbic <- tab $ bic - BIC(m0)
  }
  tab <- tab[order(aics),]

  unique(tab)
}

getModels <- function(vars, n) {
  if (n > length(vars)) stop("n too big for number of variable")
  out <- do.call(expand.grid, lapply(1:n, function(i) 1:length(vars)))
  out <- unique(t(apply(out, 1, sort)))
  out <- out[apply(out, 1, function(x) !any(table(x)>1)),,drop=FALSE]
  if (n > 1) {
    apply(out, 1, function(x) paste(vars[x], collapse = " + "))
  } else {
    vars[out]
  }
}

we use the function efp which stands for electrofishing p to get the maximum likelihood estimates of $p$ given our model for $p$ and a full model for lambda, in this case it is one $p$ for everything.

m0 <- efp(X ~ 1, data = ef, passes = "Runs")
AIC(m0)

For brevity will focus on a few effects. We know from preliminary analysis that space has a strong effect

f1s <- c("Trust",
         "HAName",
         "DESCRIPTIO",
         "factor(CATCH_ID)",
         "poly(Water_W, 1)",
         "poly(Elevation_, 1)",
         "factor(year)",
         "s(year, k=3)",
         "poly(Area, 1)",
         "factor(RIVCLASS)",
         "factor(HC)",
         "Species",
         "LifeStage",
         "Species*LifeStage"
         )
forms <- lapply(getModels(f1s, 1), function(x) as.formula(paste0("X ~ ", x)))
mods <- lapply(forms, efp, data = ef, passes = "Runs")
m0 <- efp(X ~ 1, data = ef, passes = "Runs")
summaryMods(mods, m0 = m0)
m1 <- efp(X ~ factor(CATCH_ID), data = ef, passes = "Runs")

# remove zeros
wk <- subset(ef, T > 0)
wk $ p <- m1 $ fitted
wk <- unique(wk[c("p", "CATCH_ID")])
wk <- wk[order(wk $ CATCH_ID),]
rownames(wk) <- paste(wk $ CATCH_ID)
redctm <- CLdata::redctm
redctm $ p <- wk[paste(redctm $ CATCH_ID),] $ p
spplot(redctm, "p", cuts = 8, col.regions=heat.colors(9))
# the range of capture probabilities
lattice::histogram(~p, data = wk)

Now we will try and add additional effects

f1s <- c("Trust",
         "HAName",
         "DESCRIPTIO",
         "poly(Water_W, 1)",
         "poly(Elevation_, 1)",
         "factor(year)",
         "s(year, k=3)",
         "poly(Area, 1)",
         "Species",
         "LifeStage",
         "Species*LifeStage",
         "poly(Distance_s,1)"
         )


forms <- lapply(getModels(f1s, 1), function(x) as.formula(paste0("X ~ factor(CATCH_ID) + ", x)))
m0 <- efp(X ~ factor(CATCH_ID), data = ef, passes = "Runs")
mods1 <- lapply(forms, efp, data = ef, passes = "Runs")

summaryMods(mods1, m0 = m0)


f1s <- c("Trust",
         "HAName",
         "DESCRIPTIO",
         "poly(Water_W, 1)",
         "poly(Elevation_, 1)",
         "factor(year)",
         "s(year, k=3)",
         "poly(Area, 1)",
         "poly(Distance_s,1)"
         )
forms <- lapply(getModels(f1s, 1), function(x) as.formula(paste0("X ~ factor(CATCH_ID) + Species*LifeStage +", x)))
m0 <- efp(X ~ factor(CATCH_ID) + Species*LifeStage, data = ef, passes = "Runs")
mods2 <- lapply(forms, efp, data = ef, passes = "Runs")

summaryMods(mods2, m0 = m0)


f1s <- c("HAName",
         "DESCRIPTIO",
         "poly(Water_W, 1)",
         "poly(Elevation_, 1)",
         "factor(year)",
         "s(year, k=3)",
         "poly(Area, 1)",
         "poly(Distance_s,1)"
         )
forms <- lapply(getModels(f1s, 1), function(x) as.formula(paste0("X ~ factor(CATCH_ID) + Species*LifeStage + Trust + ", x)))
m0 <- efp(X ~ factor(CATCH_ID) + Species*LifeStage + Trust, data = ef, passes = "Runs")
mods3 <- lapply(forms, efp, data = ef, passes = "Runs")

summaryMods(mods3, m0 = m0)


f1s <- c("poly(Water_W, 1)",
         "poly(Elevation_, 1)",
         "poly(Area, 1)",
         "poly(Distance_s,1)"
         )
forms <- lapply(getModels(f1s, 1), function(x) as.formula(paste0("X ~ factor(CATCH_ID) + Species*LifeStage + Trust + factor(year) + ", x)))
m0 <- efp(X ~ factor(CATCH_ID) + Species*LifeStage + Trust + factor(year), data = ef, passes = "Runs")
mods4 <- lapply(forms, efp, data = ef, passes = "Runs")

summaryMods(mods4, m0 = m0)



f1s <- c("poly(Water_W, 1)",
         "poly(Elevation_, 1)",
         "poly(Distance_s,1)"
         )
forms <- lapply(getModels(f1s, 1), function(x) as.formula(paste0("X ~ factor(CATCH_ID) + Species*LifeStage + Trust + factor(year) + poly(Area, 1) + ", x)))
m0 <- efp(X ~ factor(CATCH_ID) + Species*LifeStage + Trust + factor(year) + poly(Area, 1), data = ef, passes = "Runs")
mods5 <- lapply(forms, efp, data = ef, passes = "Runs")

summaryMods(mods5, m0 = m0)

f1s <- c("poly(Elevation_, 1)",
         "poly(Distance_s,1)",
         "poly(Elevation_, 1) + poly(Distance_s,1)"
         )
forms <- lapply(getModels(f1s, 1), function(x) as.formula(paste0("X ~ factor(CATCH_ID) + Species*LifeStage + Trust + factor(year) + poly(Area, 1) + poly(Water_W,1) + ", x)))
m0 <- efp(X ~ factor(CATCH_ID) + Species*LifeStage + Trust + factor(year) + poly(Area, 1) + poly(Water_W,1), data = ef, passes = "Runs")
mods6 <- lapply(forms, efp, data = ef, passes = "Runs")

summaryMods(mods6, m0 = m0)

# drop out terms one at a time
f1s <- c("factor(CATCH_ID)",
         "Trust",
         "poly(Water_W, 1)",
         "factor(year)",
         "poly(Area, 1)",
         "Species*LifeStage",
         "poly(Distance_s,1)"
         )
m0 <- efp(as.formula(paste0("X ~ ", paste(f1s, collapse = " + "))), data = ef, passes = "Runs", hessian = TRUE)

eH <- eigen(m0 $ hessian)
rownames(eH $ vectors) <- colnames(m0 $ G)
eVs <- zapsmall(eH $ vectors[,which(abs(eH $ values) < 1e-9)])
eVs[rowSums(abs(eVs)) > 0,]
# so it is a confounding between trust and catchment

rowSums(with(ef0, table(CATCH_ID, Trust)) > 0)
TrustFishings <- colSums(with(ef0, table(CATCH_ID, Trust)) > 0)
names(TrustFishings)[TrustFishings == 1]

with(ef0, table(CATCH_ID, Trust)


forms <- lapply(1:length(f1s), function(i) as.formula(paste0("X ~ ", paste(f1s[-i], collapse = " + "))))
modstest <- lapply(forms, efp, data = ef, passes = "Runs")

out <- summaryMods(modstest, m0 = m0)
out $ forms <- f1s[as.numeric(rownames(out))]
out

ef0 <- subset(ef, T > 0)
g0 <- mgcv::gam(G = m0 $ Gsetup)
g0 $ coefficients[] <- m0 $ par
g0 $ Vp[] <- m0 $ Vb
g0 $ family <- binomial()
ef0 $ pfit <- predict(g0, newdata = ef0)

# So there are too many parameters becuase there is some redundancy between trust and catch_id


#mbest <- efp(X ~ factor(CATCH_ID) + factor(RIVCLASS) + Species*LifeStage + factor(year) + poly(Area, 1), data = ef, passes = "Runs")


Faskally/CLmodel documentation built on Sept. 21, 2023, 1:15 p.m.