##' Read in a replicate of data simulated by FisherySim.jl
##'
##' @title Read simulated data
##' @param study Simulation study name
##' @param repl Replicate number
##' @param opmod Operating model
##' @param root_dir Directory to work in; defaults to current working directory
##' @param feather Read the Feather (default) or CSV catch file?
##' @return A \code{tibble} with catch observations
##' @author John Best
##' @importFrom dplyr %>%
##' @export
read_catch <- function(study, repl, opmod, root_dir = ".", feather = TRUE) {
if (feather) {
flnm <- sim_file_paths(study, repl, opmod, root_dir)$catch_feather
catch_df <- arrow::read_feather(flnm)
} else {
flnm <- sim_file_paths(study, repl, opmod, root_dir)$catch_csv
catch_df <- readr::read_csv(flnm,
col_types = readr::cols(
time = readr::col_integer(),
vessel_idx = readr::col_integer(),
loc_idx = readr::col_integer(),
coord1 = readr::col_double(),
coord2 = readr::col_double(),
effort = readr::col_double(),
catch_biomass = readr::col_double()))
}
catch_df <- catch_df %>%
dplyr::rename(year = time,
s1 = coord1,
s2 = coord2) %>%
dplyr::mutate(vessel = factor(vessel_idx, labels = c("Survey", "Commercial")))
## Number of years and store as an attribute for later use in constructing
## projection matrices etc.
attr(catch_df, "T") <- length(unique(catch_df$year))
catch_df
}
##' Randomly choose a subset of observations for some or all vessels etc.
##'
##' Takes subsets of observations by year and other characteristics. Subsetting
##' is specified as a data frame. Any unspecified levels are not subset.
##' @title Subsample catch by year
##' @param catch_df Catch data frame to subset
##' @param n_df Data frame specifiying how to subset
##' @return A data frame with the same columns, subset as specified
##' @author John Best
##'
##' @examples
##' \dontrun{
##' ## To subset the observations of a single vessel, only specify a number of
##' ## observations for that vessel
##' subsample_catch(catch_df,
##' tibble(vessel_idx = 2,
##' n = 1000))
##' ## To subset the observations for two vessels, specify a number of
##' ## observations for each
##' subsample_catch(catch_df,
##' tibble(vessel_idx = c(1, 2),
##' n = c(200, 1000)))
##' }
##' @importFrom dplyr %>%
##' @export
subsample_catch <- function(catch_df, n_df = NULL) {
if (is.null(n_df)) return(catch_df)
nms <- names(n_df)
"n" %in% names(n_df) || stop("Need column \`n\`")
nms_join <- nms[nms != "n"]
catch_df <- catch_df %>%
tidyr::nest(data = c(-vessel_idx, -year)) %>%
dplyr::left_join(n_df, by = nms_join) %>%
dplyr::mutate(n = dplyr::coalesce(n, purrr::map_dbl(data, nrow))) %>%
dplyr::mutate(data = purrr::map2(data, n, dplyr::sample_n)) %>%
dplyr::select(-n) %>%
tidyr::unnest(data)
## Double check that T is correct here
attr(catch_df, "T") <- length(unique(catch_df$year))
catch_df
}
##' Read true population total from file
##'
##' @title Read true population state for each year
##' @param study Study name, e.g. "qdevscaling"
##' @param repl Replicate number
##' @param opmod Operating model
##' @param root_dir Directory with e.g. \code{repl_01} subdirectory that
##' contains \code{popstate_*.csv}
##' @param filetype Which popstate file to read? Can be \code{"feather"},
##' \code{"csv"}, or \code{"h5"}
##' @return A \code{tibble} with population and year, starting from 1
##' @author John Best
##' @export
read_popstate <- function(study, repl, opmod, root_dir = ".", filetype = c("feather", "csv", "h5")) {
if (length(filetype) > 1) {
filetype <- filetype[1]
}
if (filetype == "feather") {
flnm <- sim_file_paths(study, repl, opmod, root_dir)$pop_feather
pop <- arrow::read_feather(flnm) %>%
dplyr::mutate(year = seq_along(pop))
} else if (filetype == "csv") {
flnm <- sim_file_paths(study, repl, opmod, root_dir)$pop_csv
pop <- readr::read_csv(flnm,
col_types = readr::cols(pop = readr::col_double())) %>%
dplyr::mutate(year = seq_along(pop))
} else if (filetype == "h5") {
flnm <- sim_file_paths(study, repl, opmod, root_dir)$pop_h5
h5 <- hdf5r::h5file(flnm, mode = "r")
pop <- h5[["popstate"]]$read()
}
pop
}
##' Parse coordinates that were saved as a tuple of \code{Float64} and read in
##' as a string. Much slower than using \code{get_coordref} and doing a
##' \code{join}.
##'
##' @title Parse a tuple of numbers read as a string
##' @param coord_tuple A string of the form \code{"(1.0, 2.0)"}
##' @return The two numbers in the tuple
##' @author John Best
##' @export
parse_coords <- function(coord_tuple) {
coord_str <- stringr::str_sub(coord_tuple, start = 2L, end = -2L)
coords <- as.numeric(stringr::str_split_fixed(coord_str, ", ", 2))
coords
}
##' Get the table of location indices and corresponding location coordinates
##'
##' Assumes that the table is in the file \code{coordref.csv} in the working
##' directory, as saved by \code{FisherySim.jl}
##' @title Read the index to coordinate reference table
##' @param root_dir Directory containing the file "coordref.csv"
##' @return A \code{tibble} with columns \code{loc_idx}, \code{s1}, and
##' \code{s2}.
##' @author John Best
##' @export
get_coordref <- function(root_dir = ".") {
cr_file <- normalizePath(file.path(root_dir, "coordref.csv"), mustWork = TRUE)
readr::read_csv(cr_file,
col_types = readr::cols(
loc_idx = readr::col_integer(),
s1 = readr::col_double(),
s2 = readr::col_double()))
}
##' Create a data frame with a regular grid over the spatial domain for each
##' year of the index.
##'
##' @title Create a data frame with index locations and times
##' @param step Step in the spatial domain
##' @param T Number of years
##' @return A \code{tibble} with columns \code{s1}, \code{s2}, and \code{time}
##' and attribute \code{step} for calculating the integration weights.
##' @author John Best
##' @export
create_index_df <- function(step = 1.0, T = 25) {
loc_df <- tibble::as_tibble(loc_grid(step)) %>%
dplyr::mutate(loc_idx = seq_along(s1))
index_df <- purrr::cross_df(list(loc_idx = loc_df$loc_idx,
year = seq_len(T))) %>%
dplyr::full_join(loc_df, by = "loc_idx") %>%
dplyr::select(-loc_idx)
attr(index_df, "step") <- step
index_df
}
##' Create a list of all data elements required to fit a \code{spatq} model with
##' names etc.
##'
##' @title Prepare the \code{data} argument for the \code{spatq} model
##' @param catch_df A data frame with catch observations, as read in by
##' \code{\link{read_catch}}
##' @param index_df A data frame with coordinates for index calculation, as
##' generated by \code{\link{create_index_df}}
##' @param mesh A FEM mesh for the domain
##' @param fem The sparse FEM matrices from the \code{mesh} as generated by
##' \code{generate_fem}
##' @param obs_lik Observation likelihood. Current options are \code{0}:
##' Poisson-link log-normal and \code{1}: Tweedie
##' @param X_contr Contrasts to use for fixed effect design matrix
##' @return A \code{list} with the data values for a \code{spatq} model
##' @author John Best
##' @importFrom stats contr.helmert
##' @export
prepare_data <- function(catch_df, index_df, mesh, fem,
obs_lik = 0L,
X_contr = contr.helmert) {
T <- length(unique(catch_df$year))
## Convert `time` to a factor so different contrasts can be set easily. Don't
## overwrite `time` because later calls to `generate_projection` need it to be
## numeric.
catch_df$ftime <- factor(catch_df$year)
index_df$ftime <- factor(index_df$year)
## If single-vessel and no covariates, nothing to estimate for `lambda_n` or
## `lambda_w`. Check length of unique column contents rather than `levels` in
## case already a factor and has all original vessels as levels even if none
## present. Useful e.g. if estimating index from single survey vessel.
if (length(unique(catch_df$vessel_idx)) >= 2) {
## Catchability fixed effect design matrix. Drop first column so
## that fixed effects are identifiable (e.g. if only two vessels,
## only adjust catchability for second.) Use `drop = FALSE` so the
## result is a matrix if only one column is left.
R_n <- stats::model.matrix(~ factor(vessel_idx),
data = catch_df)[, -1, drop = FALSE]
R_w <- stats::model.matrix(~ factor(vessel_idx),
data = catch_df)[, -1, drop = FALSE]
} else {
R_n <- matrix(0, nrow = nrow(catch_df))
R_w <- matrix(0, nrow = nrow(catch_df))
}
dat <- list(catch_obs = catch_df$catch_biomass,
area_swept = catch_df$effort,
## Abundance fixed effect design matrices, with `time` as a factor
## (`ftime`)
X_n = stats::model.matrix(~ ftime, data = catch_df,
contrasts.arg = list(ftime = X_contr)),
X_w = stats::model.matrix(~ ftime, data = catch_df,
contrasts.arg = list(ftime = X_contr)),
IX_n = stats::model.matrix(~ ftime, data = index_df,
contrasts.arg = list(ftime = X_contr)),
IX_w = stats::model.matrix(~ ftime, data = index_df,
contrasts.arg = list(ftime = X_contr)),
## X_n = stats::model.matrix(~ 0 + ftime, data = catch_df),
## X_w = stats::model.matrix(~ 0 + ftime, data = catch_df),
## IX_n = stats::model.matrix(~ 0 + ftime, data = index_df),
## IX_w = stats::model.matrix(~ 0 + ftime, data = index_df),
## Abundance random effect design matrices
Z_n = matrix(0, nrow = nrow(catch_df), ncol = 1),
Z_w = matrix(0, nrow = nrow(catch_df), ncol = 1),
IZ_n = matrix(0, nrow = nrow(index_df), ncol = 1),
IZ_w = matrix(0, nrow = nrow(index_df), ncol = 1),
## Abundance projection matrices
A_spat = generate_projection(mesh, catch_df),
A_sptemp = generate_projection(mesh, catch_df,
group = catch_df$year),
IA_spat = generate_projection(mesh, index_df),
IA_sptemp = generate_projection(mesh, index_df,
group = index_df$year),
## Integration weights
Ih = rep_len(attr(index_df, "step")^2, nrow(index_df)),
## Catchability fixed effect design matrix. See above for
## specification.
R_n = R_n,
R_w = R_w,
## Catchability random effect design matrix
V_n = matrix(0.0, nrow = nrow(catch_df), ncol = 1),
V_w = matrix(0.0, nrow = nrow(catch_df), ncol = 1),
## Catchability projection matrices
A_qspat = generate_projection(mesh, catch_df, vessel_idx = 2),
A_qsptemp = generate_projection(mesh, catch_df,
vessel_idx = 2,
group = catch_df$year),
## FEM matrices for spatial/spatiotemporal effects
spde = fem,
## Turn off random effects likelihood components. Start with all
## on, turn off after map
proc_switch = rep(TRUE, 12),
## Choose observation likelihood
## 0: Poisson-link zero-inflated log-normal
## 1: Tweedie
obs_lik = obs_lik,
## Normalize GMRFs externally, and include flag for returning
## negative log-likelihood without data likelihood
norm_flag = FALSE,
incl_data = TRUE)
## Store the number of years as an attribute
attr(dat, "T") <- T
dat
}
##' Generate an (all-zero) parameter starting value of appropriate dimension
##' from the corresponding data element
##'
##' @title Parameter values from data elements
##' @param data_el Data element to take dimensions from
##' @param T Number of years
##' @return A number, vector, or matrix of zeros of appropriate dimension and
##' type for that parameter of the \code{spatq} model
##' @author John Best
##' @export
pars_data <- function(data_el, T = 1) {
if (inherits(data_el, "inla.mesh")) {
n <- data_el$n
par_el <- rep_len(0.0, n * T)
} else if (is.null(dim(data_el))) {
par_el <- 0.0
} else {
par_el <- rep_len(0.0, ncol(data_el))
}
par_el
}
##' Generates a vector of length 6 indicating whether each pair of numbers
##' density and weight per group random processes should be included in the
##' likelihood calculation; i.e. they are map'd.
##'
##' @title Prepare process switch
##' @param random Character vector of random parameters; as from
##' \code{prepare_random}
##' @return Logical vector of length 12 indicating which random processes are
##' *not* map'd off
##' @author John Best
prepare_proc_switch <- function(random) {
procs <- c("gamma_n", "gamma_w", "omega_n", "omega_w",
"epsilon_n", "epsilon_w", "eta_n", "eta_w",
"phi_n", "phi_w", "psi_n", "psi_w")
procs %in% random
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.