knitr::opts_chunk$set( cache.path = "cache/model-fit/", fig.path = "figure/model-fit/", tidy = FALSE, comment="", message = FALSE, eval=FALSE, cache=TRUE) library(sp) require(spdep) library(rstan)
In order to model the data we need to convert it to a more convient form and add in covariates.
# subset data to remove Stocking, low Runs and no area. ef <- subset(CLdata::ef, !is.na(Site_OBJECTID) & !is.na(Area) & Runs > 2) # restructure ef ef <- do.call(rbind, lapply(c("S0", "SP", "T0", "TP"), function(x) { out <- ef[c("Site_OBJECTID", "Site.Name", "Dataset", "Date", "Runs", "Area", "Trust", paste0(x, "_R", 1:6))] names(out) <- gsub(x, "n", names(out)) out $ Species <- if (substring(x, 1, 1) == "S") "Salmon" else "Trout" out $ LifeStage <- if (substring(x, 2, 2) == "0") "Fry" else "Parr" out } )) # drop rows with all NAs ef <- ef[apply(ef[paste0("n_R", 1:6)], 1, function(x) !all(is.na(x))), ] # tag on HMA data hma <- CLdata::hma hma <- hma[!(hma $ HAName %in% c("Shetlands", "Orkneys")),] hma $ hmidx <- 1:nrow(hma) gis <- CLdata::gis gis @ data <- cbind(gis @ data, over(gis, hma)) # work out neighbourhood structure of hma hmaadj0 <- poly2nb(hma, queen = FALSE) hmaadj <- unclass(hmaadj0) attributes(hmaadj) <- NULL names(hmaadj) <- paste(1:length(hmaadj)) # connect hebrides hmaadj[[which(hma $ HAName == "Outer Hebrides")]] <- which(hma $ HAName == "Inner Hebrides") hmaadj[[which(hma $ HAName == "Inner Hebrides")]] <- c(21, 42, 43) # tag on gis data ef <- cbind(ef, gis[ef $ Site_OBJECTID,]) # subset put sites above barriers only for density model #ef <- subset(ef, !barrier) # fix missing value ef $ n_R3[is.na(ef $ n_R3) & ef $ Runs == 3] <- 0 # add on data summaries ef <- cbind(ef, CLmodel::getData(ef, passnames = paste0("n_R", 1:6))) # add in some dates ef $ pDate <- as.POSIXlt(ef $ Date, tz = "GMT", format = "%d/%m/%Y") # try using lubridate ef $ year <- ef $ pDate $ year + 1900 ef $ doy <- ef $ pDate $ yday # add river classification ef $ RIVCLASS <- as.numeric(sapply(strsplit(paste(ef $ RIVCODE), "/"), "[[", 2)) ef $ HC <- ifelse(ef $ RIVCLASS == 0, 0, ifelse(round(ef $ RIVCLASS) == ef $ RIVCLASS, 1, 2 )) # trim data ef <- subset(ef, doy > 150 & doy < 325 & year >= 1997 & year <= 2013)
now the data is in a suitable form for modelling
str(ef)
The likelihood of the site densities $\lambda$ and the capture probabilities $p$ can be written in terms of two statistics $T_i=\sum_{s} n_{is}$, the total number of fish caught in each electrofishing event, $i$, and $Z=$ a quantity which measures the effective number of fishing passes - i.e. whether all fish are caught during the first pass or whether they were spread out accross several fishing passes.
$L(\lambda, \beta; T, Z) = \prod_i (p_i (1-p_i)^{S_i - 1 - Z_i} \lambda_i)^{T_i} \exp(- (1-(1-p_i)^{S_i}) \lambda_i )$
We can model both the $p$ and the $\lambda$ parameters in terms of linear combinations of covariates by transforming the density and capture probability onto the real line:
$\log(\lambda_i) = A_i \alpha$ and $\log \frac{p_i}{1-p_i} = B_i \beta$
It is possible to estimate the $\alpha$ parameters conditional on the design matrix $B$ and then the estimates of $\beta$ can be estimated from a poisson regression. However, to conduct model selection or to calculate confidence intervals the full likelihood must be used.
A most simple model is one where we assume the capture probabilities are everywhere constant.
Model fitting proceeds as follows. First we estimate the capture probability using the optimiser in stan
# do some model selection summaryMods <- function(lst, m0 = NULL) { aics <- sapply(lst, "[[", "aic") bics <- sapply(lst, BIC) tab <- data.frame( forms = sapply(lst, function(x) paste(deparse(x$formula, width.cutoff = 500L))), aic = aics, bic = bics ) if (!is.null(m0)) { tab $ Daic <- tab $ aic - AIC(m0) tab $ Dbic <- tab $ bic - BIC(m0) } tab <- tab[order(aics),] unique(tab) } getModels <- function(vars, n) { if (n > length(vars)) stop("n too big for number of variable") out <- do.call(expand.grid, lapply(1:n, function(i) 1:length(vars))) out <- unique(t(apply(out, 1, sort))) out <- out[apply(out, 1, function(x) !any(table(x)>1)),,drop=FALSE] if (n > 1) { apply(out, 1, function(x) paste(vars[x], collapse = " + ")) } else { vars[out] } }
we use the function efp
which stands for electrofishing p to get the maximum likelihood estimates of $p$ given our model for $p$ and a full model for lambda, in this case it is one $p$ for everything.
m0 <- efp(X ~ 1, data = ef, passes = "Runs") AIC(m0)
For brevity will focus on a few effects. We know from preliminary analysis that space has a strong effect
f1s <- c("Trust", "HAName", "DESCRIPTIO", "factor(CATCH_ID)", "poly(Water_W, 1)", "poly(Elevation_, 1)", "factor(year)", "s(year, k=3)", "poly(Area, 1)", "factor(RIVCLASS)", "factor(HC)", "Species", "LifeStage", "Species*LifeStage" ) forms <- lapply(getModels(f1s, 1), function(x) as.formula(paste0("X ~ ", x))) mods <- lapply(forms, efp, data = ef, passes = "Runs")
m0 <- efp(X ~ 1, data = ef, passes = "Runs") summaryMods(mods, m0 = m0)
m1 <- efp(X ~ factor(CATCH_ID), data = ef, passes = "Runs") # remove zeros wk <- subset(ef, T > 0) wk $ p <- m1 $ fitted wk <- unique(wk[c("p", "CATCH_ID")]) wk <- wk[order(wk $ CATCH_ID),] rownames(wk) <- paste(wk $ CATCH_ID) redctm <- CLdata::redctm redctm $ p <- wk[paste(redctm $ CATCH_ID),] $ p spplot(redctm, "p", cuts = 8, col.regions=heat.colors(9)) # the range of capture probabilities lattice::histogram(~p, data = wk)
Now we will try and add additional effects
f1s <- c("Trust", "HAName", "DESCRIPTIO", "poly(Water_W, 1)", "poly(Elevation_, 1)", "factor(year)", "s(year, k=3)", "poly(Area, 1)", "Species", "LifeStage", "Species*LifeStage", "poly(Distance_s,1)" ) forms <- lapply(getModels(f1s, 1), function(x) as.formula(paste0("X ~ factor(CATCH_ID) + ", x))) m0 <- efp(X ~ factor(CATCH_ID), data = ef, passes = "Runs") mods1 <- lapply(forms, efp, data = ef, passes = "Runs") summaryMods(mods1, m0 = m0) f1s <- c("Trust", "HAName", "DESCRIPTIO", "poly(Water_W, 1)", "poly(Elevation_, 1)", "factor(year)", "s(year, k=3)", "poly(Area, 1)", "poly(Distance_s,1)" ) forms <- lapply(getModels(f1s, 1), function(x) as.formula(paste0("X ~ factor(CATCH_ID) + Species*LifeStage +", x))) m0 <- efp(X ~ factor(CATCH_ID) + Species*LifeStage, data = ef, passes = "Runs") mods2 <- lapply(forms, efp, data = ef, passes = "Runs") summaryMods(mods2, m0 = m0) f1s <- c("HAName", "DESCRIPTIO", "poly(Water_W, 1)", "poly(Elevation_, 1)", "factor(year)", "s(year, k=3)", "poly(Area, 1)", "poly(Distance_s,1)" ) forms <- lapply(getModels(f1s, 1), function(x) as.formula(paste0("X ~ factor(CATCH_ID) + Species*LifeStage + Trust + ", x))) m0 <- efp(X ~ factor(CATCH_ID) + Species*LifeStage + Trust, data = ef, passes = "Runs") mods3 <- lapply(forms, efp, data = ef, passes = "Runs") summaryMods(mods3, m0 = m0) f1s <- c("poly(Water_W, 1)", "poly(Elevation_, 1)", "poly(Area, 1)", "poly(Distance_s,1)" ) forms <- lapply(getModels(f1s, 1), function(x) as.formula(paste0("X ~ factor(CATCH_ID) + Species*LifeStage + Trust + factor(year) + ", x))) m0 <- efp(X ~ factor(CATCH_ID) + Species*LifeStage + Trust + factor(year), data = ef, passes = "Runs") mods4 <- lapply(forms, efp, data = ef, passes = "Runs") summaryMods(mods4, m0 = m0) f1s <- c("poly(Water_W, 1)", "poly(Elevation_, 1)", "poly(Distance_s,1)" ) forms <- lapply(getModels(f1s, 1), function(x) as.formula(paste0("X ~ factor(CATCH_ID) + Species*LifeStage + Trust + factor(year) + poly(Area, 1) + ", x))) m0 <- efp(X ~ factor(CATCH_ID) + Species*LifeStage + Trust + factor(year) + poly(Area, 1), data = ef, passes = "Runs") mods5 <- lapply(forms, efp, data = ef, passes = "Runs") summaryMods(mods5, m0 = m0) f1s <- c("poly(Elevation_, 1)", "poly(Distance_s,1)", "poly(Elevation_, 1) + poly(Distance_s,1)" ) forms <- lapply(getModels(f1s, 1), function(x) as.formula(paste0("X ~ factor(CATCH_ID) + Species*LifeStage + Trust + factor(year) + poly(Area, 1) + poly(Water_W,1) + ", x))) m0 <- efp(X ~ factor(CATCH_ID) + Species*LifeStage + Trust + factor(year) + poly(Area, 1) + poly(Water_W,1), data = ef, passes = "Runs") mods6 <- lapply(forms, efp, data = ef, passes = "Runs") summaryMods(mods6, m0 = m0) # drop out terms one at a time f1s <- c("factor(CATCH_ID)", "Trust", "poly(Water_W, 1)", "factor(year)", "poly(Area, 1)", "Species*LifeStage", "poly(Distance_s,1)" ) m0 <- efp(as.formula(paste0("X ~ ", paste(f1s, collapse = " + "))), data = ef, passes = "Runs", hessian = TRUE) eH <- eigen(m0 $ hessian) rownames(eH $ vectors) <- colnames(m0 $ G) eVs <- zapsmall(eH $ vectors[,which(abs(eH $ values) < 1e-9)]) eVs[rowSums(abs(eVs)) > 0,] # so it is a confounding between trust and catchment rowSums(with(ef0, table(CATCH_ID, Trust)) > 0) TrustFishings <- colSums(with(ef0, table(CATCH_ID, Trust)) > 0) names(TrustFishings)[TrustFishings == 1] with(ef0, table(CATCH_ID, Trust) forms <- lapply(1:length(f1s), function(i) as.formula(paste0("X ~ ", paste(f1s[-i], collapse = " + ")))) modstest <- lapply(forms, efp, data = ef, passes = "Runs") out <- summaryMods(modstest, m0 = m0) out $ forms <- f1s[as.numeric(rownames(out))] out ef0 <- subset(ef, T > 0) g0 <- mgcv::gam(G = m0 $ Gsetup) g0 $ coefficients[] <- m0 $ par g0 $ Vp[] <- m0 $ Vb g0 $ family <- binomial() ef0 $ pfit <- predict(g0, newdata = ef0) # So there are too many parameters becuase there is some redundancy between trust and catch_id #mbest <- efp(X ~ factor(CATCH_ID) + factor(RIVCLASS) + Species*LifeStage + factor(year) + poly(Area, 1), data = ef, passes = "Runs")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.