knitr::opts_chunk$set( cache.path = "cache/simulation/", fig.path = "figure/simulation/", tidy = FALSE, comment = "", message = FALSE, cache = TRUE, eval = FALSE)
Lets begin with a simple simulation. First lets setup a basic data.frame from which to work. This requires the CLdata and lubridate packages to have been installed.
wk <- subset(CLdata::ef, Dataset == "fobs") # restructure wk <- do.call(rbind, lapply(c("S0", "SP", "T0", "TP"), function(x) { out <- wk[c("Site_OBJECTID", "Site.Name", "Dataset", "Date", "Runs", "Area", paste0(x, "_R", 1:6))] names(out) <- gsub(x, "n", names(out)) out $ Species <- if (substring(x, 1, 1) == "S") "Salmon" else "Trout" out $ LifeStage <- if (substring(x, 2, 2) == "0") "Fry" else "Parr" out } )) # drop rows with all NAs wk <- wk[apply(wk[paste0("n_R", 1:6)], 1, function(x) !all(is.na(x))), ] # try using lubridate wk $ Date <- as.POSIXlt(wk $ Date, tz = "GMT", format = "%d/%m/%Y") wk $ year <- lubridate::year(wk $ Date) wk $ doy <- lubridate::yday(wk $ Date) # add in data summaries wk $ Tobs <- rowSums(wk[c("n_R1", "n_R2", "n_R3")]) wk $ Xobs <- with(wk, 2*n_R1 + n_R2) wk $ Zobs <- wk $ Xobs / wk $ Tobs # trim data wk <- subset(wk, doy > 150 & doy < 325 & year >= 1997 & year <= 2013 & Runs == 3 & Tobs > 0) head(wk)
test <- paste("This provides", nrow(wk), "observations to play with. Okay lets simulate some 3 pass data using a constant capture probabilty, but try and get similar values for the total number of fish observed.") cat(test)
# choose model parameter to give p of 0.6 alpha <- log(0.6/0.4) X <- model.matrix( ~ 1, data = wk) wk $ p <- (1 + exp(-X %*% alpha))^-1 wk $ N <- round(wk $ Tobs / (1 - (1-wk $ p)^3)) n1 <- rbinom(nrow(wk), wk $ N, wk $ p) n2 <- rbinom(nrow(wk), wk $ N - n1, wk $ p) n3 <- rbinom(nrow(wk), wk $ N - n1 - n2, wk $ p) wk $ T <- n1 + n2 + n3 wk $ X <- 2 * n1 + n2 wk $ Z <- wk $ X / wk $ T
now try to fit the model and see what we get
m0 <- efp(X ~ 1, data = wk, passes = "Runs") CI <- with(m0, rep(coefficients, each=2) + outer(c(-2, 2), sqrt(diag(m0 $ Vb))))
Lets try doing this 100 times and store the results of CI
CIsim <- sapply(1:100, function(i) { n1 <- rbinom(nrow(wk), wk $ N, wk $ p) n2 <- rbinom(nrow(wk), wk $ N - n1, wk $ p) n3 <- rbinom(nrow(wk), wk $ N - n1 - n2, wk $ p) wk $ T <- n1 + n2 + n3 wk $ X <- 2 * n1 + n2 wk $ Z <- wk $ X / wk $ T m0 <- efp(X ~ 1, data = wk, passes = "Runs") CI <- with(m0, rep(coefficients, each=2) + outer(c(-2, 2), sqrt(diag(m0 $ Vb)))) })
plot(0,0, type="n", ylim = c(0, ncol(CIsim)), xlim = range(CIsim), ylab = "", xlab = "parameter estimate", las = 1, axes = FALSE) box(bty = "o"); axis(1) abline(v = alpha, lwd = 2, col = "red") segments(CIsim[1,], 1:ncol(CIsim), CIsim[2,]) inCI <- CIsim[1,] < alpha & CIsim[2,] > alpha title(main=paste("coverage =", round(mean(inCI),3)))
So it performs well for the simplest of models. Lets try adding in some covariate effects. One to start is the Species covariate.
# choose model parameter to give p of 0.6 X <- model.matrix( ~ Species - 1, data = wk) alpha <- c(0, 0.4) wk $ p <- (1 + exp(-X %*% alpha))^-1 wk $ N <- round(wk $ Tobs / (1 - (1-wk $ p)^3)) CIsim <- sapply(1:100, function(i) { n1 <- rbinom(nrow(wk), wk $ N, wk $ p) n2 <- rbinom(nrow(wk), wk $ N - n1, wk $ p) n3 <- rbinom(nrow(wk), wk $ N - n1 - n2, wk $ p) wk $ T <- n1 + n2 + n3 wk $ X <- 2 * n1 + n2 wk $ Z <- wk $ X / wk $ T m0 <- efp(X ~ Species, data = wk, passes = "Runs") CI <- with(m0, rep(coefficients, each=2) + outer(c(-2, 2), sqrt(diag(m0 $ Vb)))) })
Now we have two sets of confidence intervals... Lets summarise the coverage for each variable seperately
inCI <- CIsim[c(1,3),] < alpha & CIsim[c(2,4),] > alpha out <- rowMeans(inCI) data.frame(alpha1 = out[1], alpha2 = out[2])
So far so good! Lets try something more complex again...
# choose model parameter to give p of 0.6 X <- model.matrix( ~ Species*LifeStage, data = wk) alpha <- c(0, 0.2, 0.3, 0.1) wk $ p <- (1 + exp(-X %*% alpha))^-1 wk $ N <- round(wk $ Tobs / (1 - (1-wk $ p)^3)) CIsim <- sapply(1:100, function(i) { n1 <- rbinom(nrow(wk), wk $ N, wk $ p) n2 <- rbinom(nrow(wk), wk $ N - n1, wk $ p) n3 <- rbinom(nrow(wk), wk $ N - n1 - n2, wk $ p) wk $ T <- n1 + n2 + n3 wk $ X <- 2 * n1 + n2 wk $ Z <- wk $ X / wk $ T m0 <- efp(X ~ Species*LifeStage, data = wk, passes = "Runs") CI <- with(m0, rep(coefficients, each=2) + outer(c(-2, 2), sqrt(diag(m0 $ Vb)))) })
inCI <- CIsim[seq(1, nrow(CIsim), by=2),] < alpha & CIsim[seq(2, nrow(CIsim), by=2),] > alpha out <- rowMeans(inCI) names(out) <- paste("alpha", 1:length(out)) out
Excellent performance so far :).
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.